Learning Embeddings into Entropic Wasserstein Spaces

Learning Embeddings into Entropic Wasserstein Spaces. Frogner, C., Mirzazadeh, F., & Solomon, J.

Euclidean embeddings of data are fundamentally limited in their ability to capture latent semantic structures, which need not conform to Euclidean spatial assumptions. Here we consider an alternative, which embeds data as discrete probability distributions in a Wasserstein space, endowed with an optimal transport metric. Wasserstein spaces are much larger and more flexible than Euclidean spaces, in that they can successfully embed a wider variety of metric structures. We exploit this flexibility by learning an embedding that captures semantic information in the Wasserstein distance between embedded distributions. We examine empirically the representational capacity of our learned Wasserstein embeddings, showing that they can embed a wide variety of metric structures with smaller distortion than an equivalent Euclidean embedding. We also investigate an application to word embedding, demonstrating a unique advantage of Wasserstein embeddings: We can visualize the high-dimensional embedding directly, since it is a probability distribution on a low-dimensional space. This obviates the need for dimensionality reduction techniques like t-SNE for visualization.

@article{frognerLearningEmbeddingsEntropic2019,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1905.03329},
  primaryClass = {cs, stat},
  title = {Learning {{Embeddings}} into {{Entropic Wasserstein Spaces}}},
  url = {http://arxiv.org/abs/1905.03329},
  abstract = {Euclidean embeddings of data are fundamentally limited in their ability to capture latent semantic structures, which need not conform to Euclidean spatial assumptions. Here we consider an alternative, which embeds data as discrete probability distributions in a Wasserstein space, endowed with an optimal transport metric. Wasserstein spaces are much larger and more flexible than Euclidean spaces, in that they can successfully embed a wider variety of metric structures. We exploit this flexibility by learning an embedding that captures semantic information in the Wasserstein distance between embedded distributions. We examine empirically the representational capacity of our learned Wasserstein embeddings, showing that they can embed a wide variety of metric structures with smaller distortion than an equivalent Euclidean embedding. We also investigate an application to word embedding, demonstrating a unique advantage of Wasserstein embeddings: We can visualize the high-dimensional embedding directly, since it is a probability distribution on a low-dimensional space. This obviates the need for dimensionality reduction techniques like t-SNE for visualization.},
  urldate = {2019-05-10},
  date = {2019-05-08},
  keywords = {Statistics - Machine Learning,Computer Science - Machine Learning},
  author = {Frogner, Charlie and Mirzazadeh, Farzaneh and Solomon, Justin},
  file = {/home/dimitri/Nextcloud/Zotero/storage/UAEFXQR2/Frogner et al. - 2019 - Learning Embeddings into Entropic Wasserstein Spac.pdf;/home/dimitri/Nextcloud/Zotero/storage/AY98Y254/1905.html}
}

Downloads: 0

{"_id":"2RJJnR2Jm6uzjCyve","bibbaseid":"frogner-mirzazadeh-solomon-learningembeddingsintoentropicwassersteinspaces","authorIDs":[],"author_short":["Frogner, C.","Mirzazadeh, F.","Solomon, J."],"bibdata":{"bibtype":"article","type":"article","archiveprefix":"arXiv","eprinttype":"arxiv","eprint":"1905.03329","primaryclass":"cs, stat","title":"Learning Embeddings into Entropic Wasserstein Spaces","url":"http://arxiv.org/abs/1905.03329","abstract":"Euclidean embeddings of data are fundamentally limited in their ability to capture latent semantic structures, which need not conform to Euclidean spatial assumptions. Here we consider an alternative, which embeds data as discrete probability distributions in a Wasserstein space, endowed with an optimal transport metric. Wasserstein spaces are much larger and more flexible than Euclidean spaces, in that they can successfully embed a wider variety of metric structures. We exploit this flexibility by learning an embedding that captures semantic information in the Wasserstein distance between embedded distributions. We examine empirically the representational capacity of our learned Wasserstein embeddings, showing that they can embed a wide variety of metric structures with smaller distortion than an equivalent Euclidean embedding. We also investigate an application to word embedding, demonstrating a unique advantage of Wasserstein embeddings: We can visualize the high-dimensional embedding directly, since it is a probability distribution on a low-dimensional space. This obviates the need for dimensionality reduction techniques like t-SNE for visualization.","urldate":"2019-05-10","date":"2019-05-08","keywords":"Statistics - Machine Learning,Computer Science - Machine Learning","author":[{"propositions":[],"lastnames":["Frogner"],"firstnames":["Charlie"],"suffixes":[]},{"propositions":[],"lastnames":["Mirzazadeh"],"firstnames":["Farzaneh"],"suffixes":[]},{"propositions":[],"lastnames":["Solomon"],"firstnames":["Justin"],"suffixes":[]}],"file":"/home/dimitri/Nextcloud/Zotero/storage/UAEFXQR2/Frogner et al. - 2019 - Learning Embeddings into Entropic Wasserstein Spac.pdf;/home/dimitri/Nextcloud/Zotero/storage/AY98Y254/1905.html","bibtex":"@article{frognerLearningEmbeddingsEntropic2019,\n archivePrefix = {arXiv},\n eprinttype = {arxiv},\n eprint = {1905.03329},\n primaryClass = {cs, stat},\n title = {Learning {{Embeddings}} into {{Entropic Wasserstein Spaces}}},\n url = {http://arxiv.org/abs/1905.03329},\n abstract = {Euclidean embeddings of data are fundamentally limited in their ability to capture latent semantic structures, which need not conform to Euclidean spatial assumptions. Here we consider an alternative, which embeds data as discrete probability distributions in a Wasserstein space, endowed with an optimal transport metric. Wasserstein spaces are much larger and more flexible than Euclidean spaces, in that they can successfully embed a wider variety of metric structures. We exploit this flexibility by learning an embedding that captures semantic information in the Wasserstein distance between embedded distributions. We examine empirically the representational capacity of our learned Wasserstein embeddings, showing that they can embed a wide variety of metric structures with smaller distortion than an equivalent Euclidean embedding. We also investigate an application to word embedding, demonstrating a unique advantage of Wasserstein embeddings: We can visualize the high-dimensional embedding directly, since it is a probability distribution on a low-dimensional space. This obviates the need for dimensionality reduction techniques like t-SNE for visualization.},\n urldate = {2019-05-10},\n date = {2019-05-08},\n keywords = {Statistics - Machine Learning,Computer Science - Machine Learning},\n author = {Frogner, Charlie and Mirzazadeh, Farzaneh and Solomon, Justin},\n file = {/home/dimitri/Nextcloud/Zotero/storage/UAEFXQR2/Frogner et al. - 2019 - Learning Embeddings into Entropic Wasserstein Spac.pdf;/home/dimitri/Nextcloud/Zotero/storage/AY98Y254/1905.html}\n}\n\n","author_short":["Frogner, C.","Mirzazadeh, F.","Solomon, J."],"key":"frognerLearningEmbeddingsEntropic2019","id":"frognerLearningEmbeddingsEntropic2019","bibbaseid":"frogner-mirzazadeh-solomon-learningembeddingsintoentropicwassersteinspaces","role":"author","urls":{"Paper":"http://arxiv.org/abs/1905.03329"},"keyword":["Statistics - Machine Learning","Computer Science - Machine Learning"],"downloads":0},"bibtype":"article","biburl":"https://raw.githubusercontent.com/dlozeve/newblog/master/bib/all.bib","creationDate":"2020-01-08T20:39:39.355Z","downloads":0,"keywords":["statistics - machine learning","computer science - machine learning"],"search_terms":["learning","embeddings","entropic","wasserstein","spaces","frogner","mirzazadeh","solomon"],"title":"Learning Embeddings into Entropic Wasserstein Spaces","year":null,"dataSources":["3XqdvqRE7zuX4cm8m"]}