Paper doi abstract bibtex

Given a set of strings, the problem of finding a string that minimises its distance to the set is directly related with problems frequently encountered in areas involving Pattern recognition or computational biology. Based on the Levenshtein (or edit) distance, different definitions of distances between a string and a set of strings can be adopted. In particular, if this definition is the sum of the distances to each string of the set, the string that minimises this distance is the (generalised) median string. Finding this string corresponds in speech recognition to giving a model for a set of acoustic sequences, and in computational biology to constructing an optimal evolutionary tree when the given phylogeny is a star. Only efficient algorithms are known for finding approximate solutions. The results in this paper are combinatorial and negative. We prove that computing the median string corresponds to a NP-complete decision problems, thus proving that this problem is NP-hard.

@article{ higuera_topology_2000, title = {Topology of strings: {Median} string is {NP}-complete}, volume = {230}, issn = {0304-3975}, shorttitle = {Topology of strings}, url = {http://www.sciencedirect.com/science/article/pii/S0304397597002405}, doi = {10.1016/S0304-3975(97)00240-5}, abstract = {Given a set of strings, the problem of finding a string that minimises its distance to the set is directly related with problems frequently encountered in areas involving Pattern recognition or computational biology. Based on the Levenshtein (or edit) distance, different definitions of distances between a string and a set of strings can be adopted. In particular, if this definition is the sum of the distances to each string of the set, the string that minimises this distance is the (generalised) median string. Finding this string corresponds in speech recognition to giving a model for a set of acoustic sequences, and in computational biology to constructing an optimal evolutionary tree when the given phylogeny is a star. Only efficient algorithms are known for finding approximate solutions. The results in this paper are combinatorial and negative. We prove that computing the median string corresponds to a NP-complete decision problems, thus proving that this problem is NP-hard.}, number = {1–2}, urldate = {2015-04-21TZ}, journal = {Theoretical Computer Science}, author = {Higuera, Colin de la and Casacuberta, F.}, month = {January}, year = {2000}, note = {00093}, keywords = {Levenshtein distance, Multiple sequence alignment, NP-complete problems, String-to-string correction, generalised Median}, pages = {39--48} }

Downloads: 0