Efficient Estimation of Word Representations in Vector Space

Efficient Estimation of Word Representations in Vector Space. Mikolov, T., Chen, K., Corrado, G., & Dean, J.

We propose two novel model architectures for computing continuous vector repre-sentations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previ-ously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art perfor-mance on our test set for measuring syntactic and semantic word similarities.

@article{
 title = {Efficient Estimation of Word Representations in Vector Space},
 type = {article},
 websites = {https://arxiv.org/pdf/1301.3781.pdf},
 id = {8373a06c-2b11-3602-a0e8-7cdfe7cadd29},
 created = {2018-02-05T18:46:24.278Z},
 accessed = {2018-02-05},
 file_attached = {true},
 profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
 group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
 last_modified = {2018-02-05T18:46:26.161Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {We propose two novel model architectures for computing continuous vector repre-sentations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previ-ously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art perfor-mance on our test set for measuring syntactic and semantic word similarities.},
 bibtype = {article},
 author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey}
}

Downloads: 0

{"_id":"aT26mrnf3D6nuScre","bibbaseid":"mikolov-chen-corrado-dean-efficientestimationofwordrepresentationsinvectorspace","downloads":0,"creationDate":"2018-02-07T16:22:57.312Z","title":"Efficient Estimation of Word Representations in Vector Space","author_short":["Mikolov, T.","Chen, K.","Corrado, G.","Dean, J."],"year":null,"bibtype":"article","biburl":null,"bibdata":{"title":"Efficient Estimation of Word Representations in Vector Space","type":"article","websites":"https://arxiv.org/pdf/1301.3781.pdf","id":"8373a06c-2b11-3602-a0e8-7cdfe7cadd29","created":"2018-02-05T18:46:24.278Z","accessed":"2018-02-05","file_attached":"true","profile_id":"371589bb-c770-37ff-8193-93c6f25ffeb1","group_id":"f982cd63-7ceb-3aa2-ac7e-a953963d6716","last_modified":"2018-02-05T18:46:26.161Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"We propose two novel model architectures for computing continuous vector repre-sentations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previ-ously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art perfor-mance on our test set for measuring syntactic and semantic word similarities.","bibtype":"article","author":"Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey","bibtex":"@article{\n title = {Efficient Estimation of Word Representations in Vector Space},\n type = {article},\n websites = {https://arxiv.org/pdf/1301.3781.pdf},\n id = {8373a06c-2b11-3602-a0e8-7cdfe7cadd29},\n created = {2018-02-05T18:46:24.278Z},\n accessed = {2018-02-05},\n file_attached = {true},\n profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},\n group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},\n last_modified = {2018-02-05T18:46:26.161Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {We propose two novel model architectures for computing continuous vector repre-sentations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previ-ously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art perfor-mance on our test set for measuring syntactic and semantic word similarities.},\n bibtype = {article},\n author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey}\n}","author_short":["Mikolov, T.","Chen, K.","Corrado, G.","Dean, J."],"urls":{"Paper":"http://bibbase.org/service/mendeley/371589bb-c770-37ff-8193-93c6f25ffeb1/file/4e727d1d-72a7-be83-4487-d0681ae4ec33/Efficient_Estimation_of_Word_Representations_in_Vector_Space.pdf.pdf","Website":"https://arxiv.org/pdf/1301.3781.pdf"},"bibbaseid":"mikolov-chen-corrado-dean-efficientestimationofwordrepresentationsinvectorspace","role":"author","downloads":0},"search_terms":["efficient","estimation","word","representations","vector","space","mikolov","chen","corrado","dean"],"keywords":[],"authorIDs":[]}