doi abstract bibtex

Distance covariance and distance correlation have been widely adopted in measuring dependence of a pair of random variables or random vectors. If the computation of distance covariance and distance correlation is implemented directly accordingly to its definition then its computational complexity is O(n2) which is a disadvantage compared to other faster methods. In this paper we show that the computation of distance covariance and distance correlation of real valued random variables can be implemented by an O(n log n) algorithm and this is comparable to other computationally efficient algorithms. The new formula we derive for an unbiased estimator for squared distance covariance turns out to be a U-statistic. This fact implies some nice asymptotic properties that were derived before via more complex methods. We apply the fast computing algorithm to some synthetic data. Our work will make distance correlation applicable to a much wider class of problems. A supplementary file to this article includes a Matlab and C based software that realizes the proposed algorithm.

@article{huoFastComputingDistance2015, title = {Fast Computing for Distance Covariance}, author = {Huo, Xiaoming and Sz{\'e}kely, G{\'a}bor J.}, year = {2015}, month = jun, pages = {0}, issn = {1537-2723}, doi = {10.1080/00401706.2015.1054435}, abstract = {Distance covariance and distance correlation have been widely adopted in measuring dependence of a pair of random variables or random vectors. If the computation of distance covariance and distance correlation is implemented directly accordingly to its definition then its computational complexity is O(n2) which is a disadvantage compared to other faster methods. In this paper we show that the computation of distance covariance and distance correlation of real valued random variables can be implemented by an O(n log n) algorithm and this is comparable to other computationally efficient algorithms. The new formula we derive for an unbiased estimator for squared distance covariance turns out to be a U-statistic. This fact implies some nice asymptotic properties that were derived before via more complex methods. We apply the fast computing algorithm to some synthetic data. Our work will make distance correlation applicable to a much wider class of problems. A supplementary file to this article includes a Matlab and C based software that realizes the proposed algorithm.}, journal = {Technometrics}, keywords = {*imported-from-citeulike-INRMM,~INRMM-MiD:c-14091371,~to-add-doi-URL,array-programming,distance-correlation,featured-publication,free-scientific-knowledge,free-software,license,license--cc-by-3-0,nonlinear-correlation,open-science,statistics}, lccn = {INRMM-MiD:c-14091371} }

Downloads: 0