var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=https://antoninschrab.github.io/files/aschrab-publications.bib&jsonp=1&nocache=1&theme=default&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=https://antoninschrab.github.io/files/aschrab-publications.bib&jsonp=1&nocache=1&theme=default\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=https://antoninschrab.github.io/files/aschrab-publications.bib&jsonp=1&nocache=1&theme=default\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n 2024\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Practical Kernel Tests of Conditional Independence.\n \n \n \n \n\n\n \n Pogodin, R.; Schrab, A.; Li, Y.; Sutherland, D. J; and Gretton, A.\n\n\n \n\n\n\n 2024.\n Arxiv preprint 2402.13196.\n\n\n\n
\n\n\n\n \n \n \"PracticalPaper\n  \n \n \n \"Practical code 1\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@unpublished{pogodin2024practical,\n  title={Practical Kernel Tests of Conditional Independence},\n  author={Pogodin, Roman and Schrab, Antonin and Li, Yazhe and Sutherland, Danica J and Gretton, Arthur},\n  keywords = {Conditional independence testing},\n  year={2024},\n  note = "Arxiv preprint 2402.13196.",\n  abstract = {We describe a data-efficient, kernel-based approach to statistical testing of conditional independence. A major challenge of conditional independence testing, absent in tests of unconditional independence, is to obtain the correct test level (the specified upper bound on the rate of false positives), while still attaining competitive test power. Excess false positives arise due to bias in the test statistic, which is obtained using nonparametric kernel ridge regression. We propose three methods for bias control to correct the test level, based on data splitting, auxiliary data, and (where possible) simpler function classes. We show these combined strategies are effective both for synthetic and real-world data.},\n  url = {https://arxiv.org/abs/2402.13196},\n  url_Code_1 = {https://github.com/romanpogodin/kernel-ci-testing},\n  eprint = {2402.13196},\n  archivePrefix = {arXiv},\n  primaryClass = {stat.ML}\n}\n
\n
\n\n\n
\n We describe a data-efficient, kernel-based approach to statistical testing of conditional independence. A major challenge of conditional independence testing, absent in tests of unconditional independence, is to obtain the correct test level (the specified upper bound on the rate of false positives), while still attaining competitive test power. Excess false positives arise due to bias in the test statistic, which is obtained using nonparametric kernel ridge regression. We propose three methods for bias control to correct the test level, based on data splitting, auxiliary data, and (where possible) simpler function classes. We show these combined strategies are effective both for synthetic and real-world data.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2023\n \n \n (3)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n MMD Aggregated Two-Sample Test.\n \n \n \n \n\n\n \n Schrab, A.; Kim, I.; Albert, M.; Laurent, B.; Guedj, B.; and Gretton, A.\n\n\n \n\n\n\n Journal of Machine Learning Research, 24(194): 1–81. 2023.\n \n\n\n\n
\n\n\n\n \n \n \"MMDPaper\n  \n \n \n \"MMD pdf\n  \n \n \n \"MMD code 1\n  \n \n \n \"MMD code 2\n  \n \n \n \"MMD slides 1\n  \n \n \n \"MMD slides 2\n  \n \n \n \"MMD poster 1\n  \n \n \n \"MMD poster 2\n  \n \n \n \"MMD video 1\n  \n \n \n \"MMD video 2\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 21 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{schrab2021mmd,\n  title = {{MMD} Aggregated Two-Sample Test},\n  author = {Antonin Schrab and Ilmun Kim and M{\\'e}lisande Albert and B{\\'e}atrice Laurent and Benjamin Guedj and Arthur Gretton},\n  journal = {Journal of Machine Learning Research},\n  year = {2023},\n  volume = {24},\n  number = {194},\n  pages = {1--81},\n  abstract = {We propose two novel nonparametric two-sample kernel tests based on the Maximum Mean Discrepancy (MMD). First, for a fixed kernel, we construct an MMD test using either permutations or a wild bootstrap, two popular numerical procedures to determine the test threshold. We prove that this test controls the probability of type I error non-asymptotically. Hence, it can be used reliably even in settings with small sample sizes as it remains well-calibrated, which differs from previous MMD tests which only guarantee correct test level asymptotically. When the difference in densities lies in a Sobolev ball, we prove minimax optimality of our MMD test with a specific kernel depending on the smoothness parameter of the Sobolev ball. In practice, this parameter is unknown and, hence, the optimal MMD test with this particular kernel cannot be used. To overcome this issue, we construct an aggregated test, called MMDAgg, which is adaptive to the smoothness parameter. The test power is maximised over the collection of kernels used, without requiring held-out data for kernel selection (which results in a loss of test power), or arbitrary kernel choices such as the median heuristic. We prove that MMDAgg still controls the level non-asymptotically, and achieves the minimax rate over Sobolev balls, up to an iterated logarithmic term. Our guarantees are not restricted to a specific type of kernel, but hold for any product of one-dimensional translation invariant characteristic kernels. We provide a user-friendly parameter-free implementation of MMDAgg using an adaptive collection of bandwidths. We demonstrate that MMDAgg significantly outperforms alternative state-of-the-art MMD-based two-sample tests on synthetic data satisfying the Sobolev smoothness assumption, and that, on real-world image data, MMDAgg closely matches the power of tests leveraging the use of models such as neural networks.},\n  keywords = {Two-sample testing},\n  url = {http://jmlr.org/papers/v24/21-1289.html},\n  url_PDF = {https://jmlr.org/papers/volume24/21-1289/21-1289.pdf},\n  url_Code_1 = {https://github.com/antoninschrab/mmdagg},\n  url_Code_2 = {https://github.com/antoninschrab/mmdagg-paper},\n  url_Slides_1 = {https://antoninschrab.github.io/files/Slides_MMDAgg_KSDAgg_long.pdf},\n  url_Slides_2 = {https://antoninschrab.github.io/files/Slides_handout-31-05-22.pdf},\n  url_Poster_1 = {https://antoninschrab.github.io/files/Poster_MMDAgg_KSDAgg.pdf},\n  url_Poster_2 = {https://antoninschrab.github.io/files/Poster_MMDAgg.pdf},\n  url_Video_1 = {https://youtu.be/F0VOCrAf5_M},\n  url_Video_2 = {https://youtu.be/OWh6Hj10wsY},\n  eprint = {2110.15073},\n  archivePrefix = {arXiv},\n  primaryClass = {stat.ML}\n}\n\n
\n
\n\n\n
\n We propose two novel nonparametric two-sample kernel tests based on the Maximum Mean Discrepancy (MMD). First, for a fixed kernel, we construct an MMD test using either permutations or a wild bootstrap, two popular numerical procedures to determine the test threshold. We prove that this test controls the probability of type I error non-asymptotically. Hence, it can be used reliably even in settings with small sample sizes as it remains well-calibrated, which differs from previous MMD tests which only guarantee correct test level asymptotically. When the difference in densities lies in a Sobolev ball, we prove minimax optimality of our MMD test with a specific kernel depending on the smoothness parameter of the Sobolev ball. In practice, this parameter is unknown and, hence, the optimal MMD test with this particular kernel cannot be used. To overcome this issue, we construct an aggregated test, called MMDAgg, which is adaptive to the smoothness parameter. The test power is maximised over the collection of kernels used, without requiring held-out data for kernel selection (which results in a loss of test power), or arbitrary kernel choices such as the median heuristic. We prove that MMDAgg still controls the level non-asymptotically, and achieves the minimax rate over Sobolev balls, up to an iterated logarithmic term. Our guarantees are not restricted to a specific type of kernel, but hold for any product of one-dimensional translation invariant characteristic kernels. We provide a user-friendly parameter-free implementation of MMDAgg using an adaptive collection of bandwidths. We demonstrate that MMDAgg significantly outperforms alternative state-of-the-art MMD-based two-sample tests on synthetic data satisfying the Sobolev smoothness assumption, and that, on real-world image data, MMDAgg closely matches the power of tests leveraging the use of models such as neural networks.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n MMD-FUSE: Learning and Combining Kernels for Two-Sample Testing Without Data Splitting.\n \n \n \n \n\n\n \n Biggs, F.; Schrab, A.; and Gretton, A.\n\n\n \n\n\n\n Advances in Neural Information Processing Systems, 36. 2023.\n Spotlight at NeurIPS 2023.\n\n\n\n
\n\n\n\n \n \n \"MMD-FUSE:Paper\n  \n \n \n \"MMD-FUSE: pdf\n  \n \n \n \"MMD-FUSE: code 1\n  \n \n \n \"MMD-FUSE: code 2\n  \n \n \n \"MMD-FUSE: slides\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 18 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{biggs2023mmdfuse,\n  author        = {Biggs, Felix and Schrab, Antonin and Gretton, Arthur},\n  title         = {{MMD-FUSE}: {L}earning and Combining Kernels for Two-Sample Testing Without Data Splitting},\n  year          = {2023},\n  journal       = {Advances in Neural Information Processing Systems},\n  volume        = {36},\n  note = "Spotlight at NeurIPS 2023.",\n  abstract = {We propose novel statistics which maximise the power of a two-sample test based on the Maximum Mean Discrepancy (MMD), by adapting over the set of kernels used in defining it. For finite sets, this reduces to combining (normalised) MMD values under each of these kernels via a weighted soft maximum. Exponential concentration bounds are proved for our proposed statistics under the null and alternative. We further show how these kernels can be chosen in a data-dependent but permutation-independent way, in a well-calibrated test, avoiding data splitting. This technique applies more broadly to general permutation-based MMD testing, and includes the use of deep kernels with features learnt using unsupervised models such as auto-encoders. We highlight the applicability of our MMD-FUSE test on both synthetic low-dimensional and real-world high-dimensional data, and compare its performance in terms of power against current state-of-the-art kernel tests.},\n  keywords = {Two-sample testing},\n  url = {https://arxiv.org/abs/2306.08777},\n  url_PDF = {https://arxiv.org/pdf/2306.08777.pdf},\n  url_Code_1 = {https://github.com/antoninschrab/mmdfuse},\n  url_Code_2 = {https://github.com/antoninschrab/mmdfuse-paper},\n  url_Slides = {https://antoninschrab.github.io/files/Slides_MMDFUSE.pdf},\n  eprint = {2306.08777},\n  archivePrefix = {arXiv},\n  primaryClass = {stat.ML}\n}\n\n
\n
\n\n\n
\n We propose novel statistics which maximise the power of a two-sample test based on the Maximum Mean Discrepancy (MMD), by adapting over the set of kernels used in defining it. For finite sets, this reduces to combining (normalised) MMD values under each of these kernels via a weighted soft maximum. Exponential concentration bounds are proved for our proposed statistics under the null and alternative. We further show how these kernels can be chosen in a data-dependent but permutation-independent way, in a well-calibrated test, avoiding data splitting. This technique applies more broadly to general permutation-based MMD testing, and includes the use of deep kernels with features learnt using unsupervised models such as auto-encoders. We highlight the applicability of our MMD-FUSE test on both synthetic low-dimensional and real-world high-dimensional data, and compare its performance in terms of power against current state-of-the-art kernel tests.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Differentially Private Permutation Tests: Applications to Kernel Methods.\n \n \n \n \n\n\n \n Kim, I.; and Schrab, A.\n\n\n \n\n\n\n 2023.\n Arxiv preprint 2310.19043.\n\n\n\n
\n\n\n\n \n \n \"DifferentiallyPaper\n  \n \n \n \"Differentially code 1\n  \n \n \n \"Differentially code 2\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@unpublished{kim2023differentially,\ntitle={Differentially Private Permutation Tests: {A}pplications to Kernel Methods}, \nauthor={Ilmun Kim and Antonin Schrab},\nyear={2023},\nnote = "Arxiv preprint 2310.19043.",\nabstract = {Recent years have witnessed growing concerns about the privacy of sensitive data. In response to these concerns, differential privacy has emerged as a rigorous framework for privacy protection, gaining widespread recognition in both academic and industrial circles. While substantial progress has been made in private data analysis, existing methods often suffer from impracticality or a significant loss of statistical efficiency. This paper aims to alleviate these concerns in the context of hypothesis testing by introducing differentially private permutation tests. The proposed framework extends classical non-private permutation tests to private settings, maintaining both finite-sample validity and differential privacy in a rigorous manner. The power of the proposed test depends on the choice of a test statistic, and we establish general conditions for consistency and non-asymptotic uniform power. To demonstrate the utility and practicality of our framework, we focus on reproducing kernel-based test statistics and introduce differentially private kernel tests for two-sample and independence testing: dpMMD and dpHSIC. The proposed kernel tests are straightforward to implement, applicable to various types of data, and attain minimax optimal power across different privacy regimes. Our empirical evaluations further highlight their competitive power under various synthetic and real-world scenarios, emphasizing their practical value. The code is publicly available to facilitate the implementation of our framework.},\nkeywords = {Differential privacy, Two-sample testing, Independence testing},\nurl = {https://arxiv.org/abs/2310.19043},\nurl_Code_1 = {https://github.com/antoninschrab/dpkernel},\nurl_Code_2 = {https://github.com/antoninschrab/dpkernel-paper},\neprint={2310.19043},\narchivePrefix={arXiv},\nprimaryClass={math.ST}\n}\n\n
\n
\n\n\n
\n Recent years have witnessed growing concerns about the privacy of sensitive data. In response to these concerns, differential privacy has emerged as a rigorous framework for privacy protection, gaining widespread recognition in both academic and industrial circles. While substantial progress has been made in private data analysis, existing methods often suffer from impracticality or a significant loss of statistical efficiency. This paper aims to alleviate these concerns in the context of hypothesis testing by introducing differentially private permutation tests. The proposed framework extends classical non-private permutation tests to private settings, maintaining both finite-sample validity and differential privacy in a rigorous manner. The power of the proposed test depends on the choice of a test statistic, and we establish general conditions for consistency and non-asymptotic uniform power. To demonstrate the utility and practicality of our framework, we focus on reproducing kernel-based test statistics and introduce differentially private kernel tests for two-sample and independence testing: dpMMD and dpHSIC. The proposed kernel tests are straightforward to implement, applicable to various types of data, and attain minimax optimal power across different privacy regimes. Our empirical evaluations further highlight their competitive power under various synthetic and real-world scenarios, emphasizing their practical value. The code is publicly available to facilitate the implementation of our framework.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n 2022\n \n \n (3)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n KSD Aggregated Goodness-of-fit Test.\n \n \n \n \n\n\n \n Schrab, A.; Guedj, B.; and Gretton, A.\n\n\n \n\n\n\n In Oh, A. H.; Agarwal, A.; Belgrave, D.; and Cho, K., editor(s), Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"KSDPaper\n  \n \n \n \"KSD pdf\n  \n \n \n \"KSD supplemental\n  \n \n \n \"KSD code 1\n  \n \n \n \"KSD code 2\n  \n \n \n \"KSD slides 1\n  \n \n \n \"KSD slides 2\n  \n \n \n \"KSD slides 3\n  \n \n \n \"KSD poster 1\n  \n \n \n \"KSD poster 2\n  \n \n \n \"KSD poster 3\n  \n \n \n \"KSD video 1\n  \n \n \n \"KSD video 2\n  \n \n \n \"KSD video 3\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 58 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@inproceedings{schrab2022ksd,\n  title = {{KSD} Aggregated Goodness-of-fit Test},\n  author = {Antonin Schrab and Benjamin Guedj and Arthur Gretton},\n  booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022},\n  editor = {Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\n  year = {2022},\n  abstract = {We investigate properties of goodness-of-fit tests based on the Kernel Stein Discrepancy (KSD). We introduce a strategy to construct a test, called KSDAgg, which aggregates multiple tests with different kernels. KSDAgg avoids splitting the data to perform kernel selection (which leads to a loss in test power), and rather maximises the test power over a collection of kernels. We provide theoretical guarantees on the power of KSDAgg: we show it achieves the smallest uniform separation rate of the collection, up to a logarithmic term. KSDAgg can be computed exactly in practice as it relies either on a parametric bootstrap or on a wild bootstrap to estimate the quantiles and the level corrections. In particular, for the crucial choice of bandwidth of a fixed kernel, it avoids resorting to arbitrary heuristics (such as median or standard deviation) or to data splitting. We find on both synthetic and real-world data that KSDAgg outperforms other state-of-the-art adaptive KSD-based goodness-of-fit testing procedures.},\n  keywords = {Goodness-of-fit testing},\n  url = {https://proceedings.neurips.cc/paper_files/paper/2022/hash/d241a7b1499cee1bf40769ceade2444d-Abstract-Conference.html},\n  url_PDF = {https://proceedings.neurips.cc/paper_files/paper/2022/file/d241a7b1499cee1bf40769ceade2444d-Paper-Conference.pdf},\n  url_Supplemental = {https://proceedings.neurips.cc/paper_files/paper/2022/file/d241a7b1499cee1bf40769ceade2444d-Supplemental-Conference.pdf},\n  url_Code_1 = {https://github.com/antoninschrab/ksdagg},\n  url_Code_2 = {https://github.com/antoninschrab/ksdagg-paper},\n  url_Slides_1 = {https://antoninschrab.github.io/files/Slides_MMDAgg_KSDAgg_long.pdf},\n  url_Slides_2 = {https://antoninschrab.github.io/files/Slides_handout-31-05-22.pdf},\n  url_Slides_3 = {https://nips.cc/media/neurips-2022/Slides/54932.pdf},\n  url_Poster_1 = {https://nips.cc/media/PosterPDFs/NeurIPS%202022/54932.png?t=1669384001.1315906},\n  url_Poster_2 = {https://antoninschrab.github.io/files/Poster_MMDAgg_KSDAgg.pdf},\n  url_Poster_3 = {https://antoninschrab.github.io/files/Poster-03-09-22.pdf},\n  url_Video_1 = {https://nips.cc/virtual/2022/poster/54932},\n  url_Video_2 = {https://youtu.be/F0VOCrAf5_M},\n  url_Video_3 = {https://youtu.be/OWh6Hj10wsY},\n  eprint = {2202.00824},\n  archivePrefix = {arXiv},\n  primaryClass = {stat.ML}\n}\n\n
\n
\n\n\n
\n We investigate properties of goodness-of-fit tests based on the Kernel Stein Discrepancy (KSD). We introduce a strategy to construct a test, called KSDAgg, which aggregates multiple tests with different kernels. KSDAgg avoids splitting the data to perform kernel selection (which leads to a loss in test power), and rather maximises the test power over a collection of kernels. We provide theoretical guarantees on the power of KSDAgg: we show it achieves the smallest uniform separation rate of the collection, up to a logarithmic term. KSDAgg can be computed exactly in practice as it relies either on a parametric bootstrap or on a wild bootstrap to estimate the quantiles and the level corrections. In particular, for the crucial choice of bandwidth of a fixed kernel, it avoids resorting to arbitrary heuristics (such as median or standard deviation) or to data splitting. We find on both synthetic and real-world data that KSDAgg outperforms other state-of-the-art adaptive KSD-based goodness-of-fit testing procedures.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Efficient Aggregated Kernel Tests using Incomplete $U$-statistics.\n \n \n \n \n\n\n \n Schrab, A.; Kim, I.; Guedj, B.; and Gretton, A.\n\n\n \n\n\n\n In Oh, A. H.; Agarwal, A.; Belgrave, D.; and Cho, K., editor(s), Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"EfficientPaper\n  \n \n \n \"Efficient pdf\n  \n \n \n \"Efficient supplemental\n  \n \n \n \"Efficient code 1\n  \n \n \n \"Efficient code 2\n  \n \n \n \"Efficient slides 1\n  \n \n \n \"Efficient slides 2\n  \n \n \n \"Efficient poster 1\n  \n \n \n \"Efficient poster 2\n  \n \n \n \"Efficient video\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 9 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n\n\n\n
\n
@inproceedings{schrab2022efficient,\n  title = {Efficient Aggregated Kernel Tests using Incomplete {$U$}-statistics},\n  author = {Antonin Schrab and Ilmun Kim and Benjamin Guedj and Arthur Gretton},\n  booktitle = {Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022},\n  editor = {Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},\n  year = {2022},\n  abstract = {We propose a series of computationally efficient, nonparametric tests for the two-sample, independence and goodness-of-fit problems, using the Maximum Mean Discrepancy (MMD), Hilbert Schmidt Independence Criterion (HSIC), and Kernel Stein Discrepancy (KSD), respectively. Our test statistics are incomplete $U$-statistics, with a computational cost that interpolates between linear time in the number of samples, and quadratic time, as associated with classical $U$-statistic tests. The three proposed tests aggregate over several kernel bandwidths to detect departures from the null on various scales: we call the resulting tests MMDAggInc, HSICAggInc and KSDAggInc. For the test thresholds, we derive a quantile bound for wild bootstrapped incomplete $U$- statistics, which is of independent interest. We derive uniform separation rates for MMDAggInc and HSICAggInc, and quantify exactly the trade-off between computational efficiency and the attainable rates: this result is novel for tests based on incomplete $U$-statistics, to our knowledge. We further show that in the quadratic-time case, the wild bootstrap incurs no penalty to test power over more widespread permutation-based approaches, since both attain the same minimax optimal rates (which in turn match the rates that use oracle quantiles). We support our claims with numerical experiments on the trade-off between computational efficiency and test power. In the three testing frameworks, we observe that our proposed linear-time aggregated tests obtain higher power than current state-of-the-art linear-time kernel tests.},\n  keywords = {Two-sample testing, Independence testing, Goodness-of-fit testing},\n  url = {https://proceedings.neurips.cc/paper_files/paper/2022/hash/774164b966cc277c82a960934445140d-Abstract-Conference.html},\n  url_PDF = {https://proceedings.neurips.cc/paper_files/paper/2022/file/774164b966cc277c82a960934445140d-Paper-Conference.pdf},\n  url_Supplemental = {https://proceedings.neurips.cc/paper_files/paper/2022/file/774164b966cc277c82a960934445140d-Supplemental-Conference.pdf},\n  url_Code_1 = {https://github.com/antoninschrab/agginc},\n  url_Code_2 = {https://github.com/antoninschrab/agginc-paper},\n  url_Slides_1 = {https://antoninschrab.github.io/files/Slides_handout-31-05-22.pdf},\n  url_Slides_2 = {https://nips.cc/media/neurips-2022/Slides/54933.pdf},\n  url_Poster_1 = {https://nips.cc/media/PosterPDFs/NeurIPS%202022/54933.png?t=1669384934.1906412},\n  url_Poster_2 = {https://antoninschrab.github.io/files/Poster-03-09-22.pdf},\n  url_Video = {https://nips.cc/virtual/2022/poster/54933},\n  eprint = {2206.09194},\n  archivePrefix = {arXiv},\n  primaryClass = {stat.ML}\n}\n\n
\n
\n\n\n
\n We propose a series of computationally efficient, nonparametric tests for the two-sample, independence and goodness-of-fit problems, using the Maximum Mean Discrepancy (MMD), Hilbert Schmidt Independence Criterion (HSIC), and Kernel Stein Discrepancy (KSD), respectively. Our test statistics are incomplete $U$-statistics, with a computational cost that interpolates between linear time in the number of samples, and quadratic time, as associated with classical $U$-statistic tests. The three proposed tests aggregate over several kernel bandwidths to detect departures from the null on various scales: we call the resulting tests MMDAggInc, HSICAggInc and KSDAggInc. For the test thresholds, we derive a quantile bound for wild bootstrapped incomplete $U$- statistics, which is of independent interest. We derive uniform separation rates for MMDAggInc and HSICAggInc, and quantify exactly the trade-off between computational efficiency and the attainable rates: this result is novel for tests based on incomplete $U$-statistics, to our knowledge. We further show that in the quadratic-time case, the wild bootstrap incurs no penalty to test power over more widespread permutation-based approaches, since both attain the same minimax optimal rates (which in turn match the rates that use oracle quantiles). We support our claims with numerical experiments on the trade-off between computational efficiency and test power. In the three testing frameworks, we observe that our proposed linear-time aggregated tests obtain higher power than current state-of-the-art linear-time kernel tests.\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Discussion of `Multiscale Fisher's Independence Test for Multivariate Dependence'.\n \n \n \n \n\n\n \n Schrab, A.; Jitkrittum, W.; Szabó, Z.; Sejdinovic, D.; and Gretton, A.\n\n\n \n\n\n\n Biometrika, 109(3): 597-603. 08 2022.\n \n\n\n\n
\n\n\n\n \n \n \"DiscussionPaper\n  \n \n \n \"Discussion arxiv\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n  \n \n 7 downloads\n \n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n\n\n\n
\n
@article{schrab2022discussion,\n  author = {Antonin Schrab and Wittawat Jitkrittum and Zolt\\'an Szab\\'o and Dino Sejdinovic and Arthur Gretton},\n  title = {Discussion of `{M}ultiscale {F}isher's Independence Test for Multivariate Dependence'},\n  journal = {Biometrika},\n  volume = {109},\n  number = {3},\n  pages = {597-603},\n  year = {2022},\n  month = {08},\n  issn = {1464-3510},\n  doi = {10.1093/biomet/asac028},\n  url = {https://doi.org/10.1093/biomet/asac028},\n  eprint = {https://academic.oup.com/biomet/article-pdf/109/3/597/45512180/asac028.pdf},\n  url_arXiv = {https://arxiv.org/pdf/2206.11142},\n  abstract = {We discuss how MultiFIT, the Multiscale Fisher's Independence Test for Multivariate Dependence proposed by Gorsky and Ma (2022), compares to existing linear-time kernel tests based on the Hilbert-Schmidt independence criterion (HSIC). We highlight the fact that the levels of the kernel tests at any finite sample size can be controlled exactly, as it is the case with the level of MultiFIT. In our experiments, we observe some of the performance limitations of MultiFIT in terms of test power.},\n  keywords = {Independence testing},\n  eprint = {2206.11142},\n  archivePrefix = {arXiv},\n  primaryClass = {stat.ME}\n}\n\n
\n
\n\n\n
\n We discuss how MultiFIT, the Multiscale Fisher's Independence Test for Multivariate Dependence proposed by Gorsky and Ma (2022), compares to existing linear-time kernel tests based on the Hilbert-Schmidt independence criterion (HSIC). We highlight the fact that the levels of the kernel tests at any finite sample size can be controlled exactly, as it is the case with the level of MultiFIT. In our experiments, we observe some of the performance limitations of MultiFIT in terms of test power.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);