An Empirical Evaluation of doc2vec with Practical Insights into Document Embedding Generation. Lau, J., H. & Baldwin, T. Paper Website abstract bibtex Recently, Le and Mikolov (2014) pro-posed doc2vec as an extension to word2vec (Mikolov et al., 2013a) to learn document-level embeddings. De-spite promising results in the original pa-per, others have struggled to reproduce those results. This paper presents a rig-orous empirical evaluation of doc2vec over two tasks. We compare doc2vec to two baselines and two state-of-the-art document embedding methodologies. We found that doc2vec performs robustly when using models trained on large ex-ternal corpora, and can be further im-proved by using pre-trained word embed-dings. We also provide recommendations on hyper-parameter settings for general-purpose applications, and release source code to induce document embeddings us-ing our trained doc2vec models.
@article{
title = {An Empirical Evaluation of doc2vec with Practical Insights into Document Embedding Generation},
type = {article},
websites = {https://arxiv.org/pdf/1607.05368.pdf},
id = {98a86971-36fd-3a46-8792-d6a98100e135},
created = {2018-02-05T19:16:26.305Z},
accessed = {2018-02-05},
file_attached = {true},
profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},
group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},
last_modified = {2018-02-05T19:16:28.364Z},
read = {false},
starred = {false},
authored = {false},
confirmed = {false},
hidden = {false},
private_publication = {false},
abstract = {Recently, Le and Mikolov (2014) pro-posed doc2vec as an extension to word2vec (Mikolov et al., 2013a) to learn document-level embeddings. De-spite promising results in the original pa-per, others have struggled to reproduce those results. This paper presents a rig-orous empirical evaluation of doc2vec over two tasks. We compare doc2vec to two baselines and two state-of-the-art document embedding methodologies. We found that doc2vec performs robustly when using models trained on large ex-ternal corpora, and can be further im-proved by using pre-trained word embed-dings. We also provide recommendations on hyper-parameter settings for general-purpose applications, and release source code to induce document embeddings us-ing our trained doc2vec models.},
bibtype = {article},
author = {Lau, Jey Han and Baldwin, Timothy}
}
Downloads: 0
{"_id":"hwegPPsjzf5nSwtKM","bibbaseid":"lau-baldwin-anempiricalevaluationofdoc2vecwithpracticalinsightsintodocumentembeddinggeneration","downloads":0,"creationDate":"2018-02-07T16:22:57.346Z","title":"An Empirical Evaluation of doc2vec with Practical Insights into Document Embedding Generation","author_short":["Lau, J., H.","Baldwin, T."],"year":null,"bibtype":"article","biburl":null,"bibdata":{"title":"An Empirical Evaluation of doc2vec with Practical Insights into Document Embedding Generation","type":"article","websites":"https://arxiv.org/pdf/1607.05368.pdf","id":"98a86971-36fd-3a46-8792-d6a98100e135","created":"2018-02-05T19:16:26.305Z","accessed":"2018-02-05","file_attached":"true","profile_id":"371589bb-c770-37ff-8193-93c6f25ffeb1","group_id":"f982cd63-7ceb-3aa2-ac7e-a953963d6716","last_modified":"2018-02-05T19:16:28.364Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"Recently, Le and Mikolov (2014) pro-posed doc2vec as an extension to word2vec (Mikolov et al., 2013a) to learn document-level embeddings. De-spite promising results in the original pa-per, others have struggled to reproduce those results. This paper presents a rig-orous empirical evaluation of doc2vec over two tasks. We compare doc2vec to two baselines and two state-of-the-art document embedding methodologies. We found that doc2vec performs robustly when using models trained on large ex-ternal corpora, and can be further im-proved by using pre-trained word embed-dings. We also provide recommendations on hyper-parameter settings for general-purpose applications, and release source code to induce document embeddings us-ing our trained doc2vec models.","bibtype":"article","author":"Lau, Jey Han and Baldwin, Timothy","bibtex":"@article{\n title = {An Empirical Evaluation of doc2vec with Practical Insights into Document Embedding Generation},\n type = {article},\n websites = {https://arxiv.org/pdf/1607.05368.pdf},\n id = {98a86971-36fd-3a46-8792-d6a98100e135},\n created = {2018-02-05T19:16:26.305Z},\n accessed = {2018-02-05},\n file_attached = {true},\n profile_id = {371589bb-c770-37ff-8193-93c6f25ffeb1},\n group_id = {f982cd63-7ceb-3aa2-ac7e-a953963d6716},\n last_modified = {2018-02-05T19:16:28.364Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {Recently, Le and Mikolov (2014) pro-posed doc2vec as an extension to word2vec (Mikolov et al., 2013a) to learn document-level embeddings. De-spite promising results in the original pa-per, others have struggled to reproduce those results. This paper presents a rig-orous empirical evaluation of doc2vec over two tasks. We compare doc2vec to two baselines and two state-of-the-art document embedding methodologies. We found that doc2vec performs robustly when using models trained on large ex-ternal corpora, and can be further im-proved by using pre-trained word embed-dings. We also provide recommendations on hyper-parameter settings for general-purpose applications, and release source code to induce document embeddings us-ing our trained doc2vec models.},\n bibtype = {article},\n author = {Lau, Jey Han and Baldwin, Timothy}\n}","author_short":["Lau, J., H.","Baldwin, T."],"urls":{"Paper":"http://bibbase.org/service/mendeley/371589bb-c770-37ff-8193-93c6f25ffeb1/file/3408e7b6-68e9-2c5d-09c5-7e28887fa83f/An_Empirical_Evaluation_of_doc2vec_with_Practical_Insights_into_Document_Embedding_Generation.pdf.pdf","Website":"https://arxiv.org/pdf/1607.05368.pdf"},"bibbaseid":"lau-baldwin-anempiricalevaluationofdoc2vecwithpracticalinsightsintodocumentembeddinggeneration","role":"author","downloads":0},"search_terms":["empirical","evaluation","doc2vec","practical","insights","document","embedding","generation","lau","baldwin"],"keywords":[],"authorIDs":[]}