doi abstract bibtex

Perfect phylogeny is one of the fundamental models for studying evolution. We investigate the following variant of the model: The input is a species-characters matrix. The characters are binary and directed; i.e., a species can only gain characters. The difference from standard perfect phylogeny is that for some species the states of some characters are unknown. The question is whether one can complete the missing states in a way that admits a perfect phylogeny. The problem arises in classical phylogenetic studies, when some states are missing or undetermined. Quite recently, studies that infer phylogenies using inserted repeat elements in DNA gave rise to the same problem. Extant solutions for it take time $O(n^2m)$ for n species and m characters. We provide a graph theoretic formulation of the problem as a graph sandwich problem, and give near-optimal $Õ(nm)$-time algorithms for the problem. We also study the problem of finding a single, general solution tree, from which any other solution can be obtained by node splitting. We provide an algorithm to construct such a tree, or determine that none exists.

@Article{peer04incomplete, author = {Itsik Pe'er and Tal Pupko and Ron Shamir and Roded Sharan}, title = {Incomplete Directed Perfect Phylogeny}, journal = {SIAM J Comput}, year = {2004}, volume = {33}, number = {3}, pages = {590--607}, abstract = {Perfect phylogeny is one of the fundamental models for studying evolution. We investigate the following variant of the model: The input is a species-characters matrix. The characters are binary and directed; i.e., a species can only gain characters. The difference from standard perfect phylogeny is that for some species the states of some characters are unknown. The question is whether one can complete the missing states in a way that admits a perfect phylogeny. The problem arises in classical phylogenetic studies, when some states are missing or undetermined. Quite recently, studies that infer phylogenies using inserted repeat elements in DNA gave rise to the same problem. Extant solutions for it take time $O(n^2m)$ for n species and m characters. We provide a graph theoretic formulation of the problem as a graph sandwich problem, and give near-optimal $\tilde{O}(nm)$-time algorithms for the problem. We also study the problem of finding a single, general solution tree, from which any other solution can be obtained by node splitting. We provide an algorithm to construct such a tree, or determine that none exists.}, doi = {10.1137/S0097539702406510}, keywords = {perfect phylogeny, phylogeny, flip supertrees, supertrees}, }

Downloads: 0