Latin BERT: A Contextual Language Model for Classical Philology. Bamman, D. & Burns, P. J. September, 2020. arXiv:2009.10053 [cs]
Paper abstract bibtex We present Latin BERT, a contextual language model for the Latin language, trained on 642.7 million words from a variety of sources spanning the Classical era to the 21st century. In a series of case studies, we illustrate the affordances of this language-specific model both for work in natural language processing for Latin and in using computational methods for traditional scholarship: we show that Latin BERT achieves a new state of the art for part-of-speech tagging on all three Universal Dependency datasets for Latin and can be used for predicting missing text (including critical emendations); we create a new dataset for assessing word sense disambiguation for Latin and demonstrate that Latin BERT outperforms static word embeddings; and we show that it can be used for semanticallyinformed search by querying contextual nearest neighbors. We publicly release trained models to help drive future work in this space.
@misc{bamman_latin_2020,
title = {Latin {BERT}: {A} {Contextual} {Language} {Model} for {Classical} {Philology}},
shorttitle = {Latin {BERT}},
url = {http://arxiv.org/abs/2009.10053},
abstract = {We present Latin BERT, a contextual language model for the Latin language, trained on 642.7 million words from a variety of sources spanning the Classical era to the 21st century. In a series of case studies, we illustrate the affordances of this language-specific model both for work in natural language processing for Latin and in using computational methods for traditional scholarship: we show that Latin BERT achieves a new state of the art for part-of-speech tagging on all three Universal Dependency datasets for Latin and can be used for predicting missing text (including critical emendations); we create a new dataset for assessing word sense disambiguation for Latin and demonstrate that Latin BERT outperforms static word embeddings; and we show that it can be used for semanticallyinformed search by querying contextual nearest neighbors. We publicly release trained models to help drive future work in this space.},
language = {en},
urldate = {2023-08-26},
publisher = {arXiv},
author = {Bamman, David and Burns, Patrick J.},
month = sep,
year = {2020},
note = {arXiv:2009.10053 [cs]},
keywords = {Computer Science - Computation and Language},
}
Downloads: 0
{"_id":"wwgESBWRhJyfkXK9w","bibbaseid":"bamman-burns-latinbertacontextuallanguagemodelforclassicalphilology-2020","author_short":["Bamman, D.","Burns, P. J."],"bibdata":{"bibtype":"misc","type":"misc","title":"Latin BERT: A Contextual Language Model for Classical Philology","shorttitle":"Latin BERT","url":"http://arxiv.org/abs/2009.10053","abstract":"We present Latin BERT, a contextual language model for the Latin language, trained on 642.7 million words from a variety of sources spanning the Classical era to the 21st century. In a series of case studies, we illustrate the affordances of this language-specific model both for work in natural language processing for Latin and in using computational methods for traditional scholarship: we show that Latin BERT achieves a new state of the art for part-of-speech tagging on all three Universal Dependency datasets for Latin and can be used for predicting missing text (including critical emendations); we create a new dataset for assessing word sense disambiguation for Latin and demonstrate that Latin BERT outperforms static word embeddings; and we show that it can be used for semanticallyinformed search by querying contextual nearest neighbors. We publicly release trained models to help drive future work in this space.","language":"en","urldate":"2023-08-26","publisher":"arXiv","author":[{"propositions":[],"lastnames":["Bamman"],"firstnames":["David"],"suffixes":[]},{"propositions":[],"lastnames":["Burns"],"firstnames":["Patrick","J."],"suffixes":[]}],"month":"September","year":"2020","note":"arXiv:2009.10053 [cs]","keywords":"Computer Science - Computation and Language","bibtex":"@misc{bamman_latin_2020,\n\ttitle = {Latin {BERT}: {A} {Contextual} {Language} {Model} for {Classical} {Philology}},\n\tshorttitle = {Latin {BERT}},\n\turl = {http://arxiv.org/abs/2009.10053},\n\tabstract = {We present Latin BERT, a contextual language model for the Latin language, trained on 642.7 million words from a variety of sources spanning the Classical era to the 21st century. In a series of case studies, we illustrate the affordances of this language-specific model both for work in natural language processing for Latin and in using computational methods for traditional scholarship: we show that Latin BERT achieves a new state of the art for part-of-speech tagging on all three Universal Dependency datasets for Latin and can be used for predicting missing text (including critical emendations); we create a new dataset for assessing word sense disambiguation for Latin and demonstrate that Latin BERT outperforms static word embeddings; and we show that it can be used for semanticallyinformed search by querying contextual nearest neighbors. We publicly release trained models to help drive future work in this space.},\n\tlanguage = {en},\n\turldate = {2023-08-26},\n\tpublisher = {arXiv},\n\tauthor = {Bamman, David and Burns, Patrick J.},\n\tmonth = sep,\n\tyear = {2020},\n\tnote = {arXiv:2009.10053 [cs]},\n\tkeywords = {Computer Science - Computation and Language},\n}\n\n\n\n","author_short":["Bamman, D.","Burns, P. J."],"key":"bamman_latin_2020","id":"bamman_latin_2020","bibbaseid":"bamman-burns-latinbertacontextuallanguagemodelforclassicalphilology-2020","role":"author","urls":{"Paper":"http://arxiv.org/abs/2009.10053"},"keyword":["Computer Science - Computation and Language"],"metadata":{"authorlinks":{}}},"bibtype":"misc","biburl":"https://bibbase.org/zotero-group/schulzkx/5158478","dataSources":["JFDnASMkoQCjjGL8E"],"keywords":["computer science - computation and language"],"search_terms":["latin","bert","contextual","language","model","classical","philology","bamman","burns"],"title":"Latin BERT: A Contextual Language Model for Classical Philology","year":2020}