Machine Learning for Ancient Languages: A Survey. Sommerschield, T., Assael, Y., Pavlopoulos, J., Stefanak, V., Senior, A., Dyer, C., Bodel, J., Prag, J., Androutsopoulos, I., & Freitas, N. d. Computational Linguistics, August, 2023. Paper doi abstract bibtex Ancient languages preserve the cultures and histories of the past. However, their study is fraught with difficulties, and experts must tackle a range of challenging text-based tasks, from deciphering lost languages to restoring damaged inscriptions, to determining the authorship of works of literature. Technological aids have long supported the study of ancient texts, but in recent years advances in artificial intelligence and machine learning have enabled analyses on a scale and in a detail that are reshaping the field of humanities, similarly to how microscopes and telescopes have contributed to the realm of science. This article aims to provide a comprehensive survey of published research using machine learning for the study of ancient texts written in any language, script, and medium, spanning over three and a half millennia of civilizations around the ancient world. To analyze the relevant literature, we introduce a taxonomy of tasks inspired by the steps involved in the study of ancient documents: digitization, restoration, attribution, linguistic analysis, textual criticism, translation, and decipherment. This work offers three major contributions: first, mapping the interdisciplinary field carved out by the synergy between the humanities and machine learning; second, highlighting how active collaboration between specialists from both fields is key to producing impactful and compelling scholarship; third, highlighting promising directions for future work in this field. Thus, this work promotes and supports the continued collaborative impetus between the humanities and machine learning.
@article{sommerschield_machine_2023,
title = {Machine {Learning} for {Ancient} {Languages}: {A} {Survey}},
issn = {0891-2017},
shorttitle = {Machine {Learning} for {Ancient} {Languages}},
url = {https://doi.org/10.1162/coli_a_00481},
doi = {10.1162/coli_a_00481},
abstract = {Ancient languages preserve the cultures and histories of the past. However, their study is fraught with difficulties, and experts must tackle a range of challenging text-based tasks, from deciphering lost languages to restoring damaged inscriptions, to determining the authorship of works of literature. Technological aids have long supported the study of ancient texts, but in recent years advances in artificial intelligence and machine learning have enabled analyses on a scale and in a detail that are reshaping the field of humanities, similarly to how microscopes and telescopes have contributed to the realm of science. This article aims to provide a comprehensive survey of published research using machine learning for the study of ancient texts written in any language, script, and medium, spanning over three and a half millennia of civilizations around the ancient world. To analyze the relevant literature, we introduce a taxonomy of tasks inspired by the steps involved in the study of ancient documents: digitization, restoration, attribution, linguistic analysis, textual criticism, translation, and decipherment. This work offers three major contributions: first, mapping the interdisciplinary field carved out by the synergy between the humanities and machine learning; second, highlighting how active collaboration between specialists from both fields is key to producing impactful and compelling scholarship; third, highlighting promising directions for future work in this field. Thus, this work promotes and supports the continued collaborative impetus between the humanities and machine learning.},
urldate = {2023-09-15},
journal = {Computational Linguistics},
author = {Sommerschield, Thea and Assael, Yannis and Pavlopoulos, John and Stefanak, Vanessa and Senior, Andrew and Dyer, Chris and Bodel, John and Prag, Jonathan and Androutsopoulos, Ion and Freitas, Nando de},
month = aug,
year = {2023},
pages = {1--45},
}
Downloads: 0
{"_id":"BTy3Ztyyff2yJXcja","bibbaseid":"sommerschield-assael-pavlopoulos-stefanak-senior-dyer-bodel-prag-etal-machinelearningforancientlanguagesasurvey-2023","author_short":["Sommerschield, T.","Assael, Y.","Pavlopoulos, J.","Stefanak, V.","Senior, A.","Dyer, C.","Bodel, J.","Prag, J.","Androutsopoulos, I.","Freitas, N. d."],"bibdata":{"bibtype":"article","type":"article","title":"Machine Learning for Ancient Languages: A Survey","issn":"0891-2017","shorttitle":"Machine Learning for Ancient Languages","url":"https://doi.org/10.1162/coli_a_00481","doi":"10.1162/coli_a_00481","abstract":"Ancient languages preserve the cultures and histories of the past. However, their study is fraught with difficulties, and experts must tackle a range of challenging text-based tasks, from deciphering lost languages to restoring damaged inscriptions, to determining the authorship of works of literature. Technological aids have long supported the study of ancient texts, but in recent years advances in artificial intelligence and machine learning have enabled analyses on a scale and in a detail that are reshaping the field of humanities, similarly to how microscopes and telescopes have contributed to the realm of science. This article aims to provide a comprehensive survey of published research using machine learning for the study of ancient texts written in any language, script, and medium, spanning over three and a half millennia of civilizations around the ancient world. To analyze the relevant literature, we introduce a taxonomy of tasks inspired by the steps involved in the study of ancient documents: digitization, restoration, attribution, linguistic analysis, textual criticism, translation, and decipherment. This work offers three major contributions: first, mapping the interdisciplinary field carved out by the synergy between the humanities and machine learning; second, highlighting how active collaboration between specialists from both fields is key to producing impactful and compelling scholarship; third, highlighting promising directions for future work in this field. Thus, this work promotes and supports the continued collaborative impetus between the humanities and machine learning.","urldate":"2023-09-15","journal":"Computational Linguistics","author":[{"propositions":[],"lastnames":["Sommerschield"],"firstnames":["Thea"],"suffixes":[]},{"propositions":[],"lastnames":["Assael"],"firstnames":["Yannis"],"suffixes":[]},{"propositions":[],"lastnames":["Pavlopoulos"],"firstnames":["John"],"suffixes":[]},{"propositions":[],"lastnames":["Stefanak"],"firstnames":["Vanessa"],"suffixes":[]},{"propositions":[],"lastnames":["Senior"],"firstnames":["Andrew"],"suffixes":[]},{"propositions":[],"lastnames":["Dyer"],"firstnames":["Chris"],"suffixes":[]},{"propositions":[],"lastnames":["Bodel"],"firstnames":["John"],"suffixes":[]},{"propositions":[],"lastnames":["Prag"],"firstnames":["Jonathan"],"suffixes":[]},{"propositions":[],"lastnames":["Androutsopoulos"],"firstnames":["Ion"],"suffixes":[]},{"propositions":[],"lastnames":["Freitas"],"firstnames":["Nando","de"],"suffixes":[]}],"month":"August","year":"2023","pages":"1–45","bibtex":"@article{sommerschield_machine_2023,\n\ttitle = {Machine {Learning} for {Ancient} {Languages}: {A} {Survey}},\n\tissn = {0891-2017},\n\tshorttitle = {Machine {Learning} for {Ancient} {Languages}},\n\turl = {https://doi.org/10.1162/coli_a_00481},\n\tdoi = {10.1162/coli_a_00481},\n\tabstract = {Ancient languages preserve the cultures and histories of the past. However, their study is fraught with difficulties, and experts must tackle a range of challenging text-based tasks, from deciphering lost languages to restoring damaged inscriptions, to determining the authorship of works of literature. Technological aids have long supported the study of ancient texts, but in recent years advances in artificial intelligence and machine learning have enabled analyses on a scale and in a detail that are reshaping the field of humanities, similarly to how microscopes and telescopes have contributed to the realm of science. This article aims to provide a comprehensive survey of published research using machine learning for the study of ancient texts written in any language, script, and medium, spanning over three and a half millennia of civilizations around the ancient world. To analyze the relevant literature, we introduce a taxonomy of tasks inspired by the steps involved in the study of ancient documents: digitization, restoration, attribution, linguistic analysis, textual criticism, translation, and decipherment. This work offers three major contributions: first, mapping the interdisciplinary field carved out by the synergy between the humanities and machine learning; second, highlighting how active collaboration between specialists from both fields is key to producing impactful and compelling scholarship; third, highlighting promising directions for future work in this field. Thus, this work promotes and supports the continued collaborative impetus between the humanities and machine learning.},\n\turldate = {2023-09-15},\n\tjournal = {Computational Linguistics},\n\tauthor = {Sommerschield, Thea and Assael, Yannis and Pavlopoulos, John and Stefanak, Vanessa and Senior, Andrew and Dyer, Chris and Bodel, John and Prag, Jonathan and Androutsopoulos, Ion and Freitas, Nando de},\n\tmonth = aug,\n\tyear = {2023},\n\tpages = {1--45},\n}\n\n\n\n","author_short":["Sommerschield, T.","Assael, Y.","Pavlopoulos, J.","Stefanak, V.","Senior, A.","Dyer, C.","Bodel, J.","Prag, J.","Androutsopoulos, I.","Freitas, N. d."],"key":"sommerschield_machine_2023","id":"sommerschield_machine_2023","bibbaseid":"sommerschield-assael-pavlopoulos-stefanak-senior-dyer-bodel-prag-etal-machinelearningforancientlanguagesasurvey-2023","role":"author","urls":{"Paper":"https://doi.org/10.1162/coli_a_00481"},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://bibbase.org/zotero-group/schulzkx/5158478","dataSources":["JFDnASMkoQCjjGL8E"],"keywords":[],"search_terms":["machine","learning","ancient","languages","survey","sommerschield","assael","pavlopoulos","stefanak","senior","dyer","bodel","prag","androutsopoulos","freitas"],"title":"Machine Learning for Ancient Languages: A Survey","year":2023}