The GLAUx corpus: methodological issues in designing a long-term, diverse, multi-layered corpus of Ancient Greek. Keersmaekers, A. In Proceedings of the 2nd International Workshop on Computational Approaches to Historical Language Change 2021, pages 39–50, Online, August, 2021. Association for Computational Linguistics.
Paper doi abstract bibtex This paper describes the GLAUx project (“the Greek Language Automated”), an ongoing effort to develop a large long-term diachronic corpus of Greek, covering sixteen centuries of literary and non-literary material annotated with NLP methods. After providing an overview of related corpus projects and discussing the general architecture of the corpus, it zooms in on a number of larger methodological issues in the design of historical corpora. These include the encoding of textual variants, handling extralinguistic variation and annotating linguistic ambiguity. Finally, the long- and short-term perspectives of this project are discussed.
@inproceedings{keersmaekers_glaux_2021,
address = {Online},
title = {The {GLAUx} corpus: methodological issues in designing a long-term, diverse, multi-layered corpus of {Ancient} {Greek}},
url = {https://aclanthology.org/2021.lchange-1.6},
doi = {10.18653/v1/2021.lchange-1.6},
abstract = {This paper describes the GLAUx project (“the Greek Language Automated”), an ongoing effort to develop a large long-term diachronic corpus of Greek, covering sixteen centuries of literary and non-literary material annotated with NLP methods. After providing an overview of related corpus projects and discussing the general architecture of the corpus, it zooms in on a number of larger methodological issues in the design of historical corpora. These include the encoding of textual variants, handling extralinguistic variation and annotating linguistic ambiguity. Finally, the long- and short-term perspectives of this project are discussed.},
booktitle = {Proceedings of the 2nd {International} {Workshop} on {Computational} {Approaches} to {Historical} {Language} {Change} 2021},
publisher = {Association for Computational Linguistics},
author = {Keersmaekers, Alek},
month = aug,
year = {2021},
pages = {39--50},
}
Downloads: 0
{"_id":"wwSTrYHiGXNLdxENY","bibbaseid":"keersmaekers-theglauxcorpusmethodologicalissuesindesigningalongtermdiversemultilayeredcorpusofancientgreek-2021","author_short":["Keersmaekers, A."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Online","title":"The GLAUx corpus: methodological issues in designing a long-term, diverse, multi-layered corpus of Ancient Greek","url":"https://aclanthology.org/2021.lchange-1.6","doi":"10.18653/v1/2021.lchange-1.6","abstract":"This paper describes the GLAUx project (“the Greek Language Automated”), an ongoing effort to develop a large long-term diachronic corpus of Greek, covering sixteen centuries of literary and non-literary material annotated with NLP methods. After providing an overview of related corpus projects and discussing the general architecture of the corpus, it zooms in on a number of larger methodological issues in the design of historical corpora. These include the encoding of textual variants, handling extralinguistic variation and annotating linguistic ambiguity. Finally, the long- and short-term perspectives of this project are discussed.","booktitle":"Proceedings of the 2nd International Workshop on Computational Approaches to Historical Language Change 2021","publisher":"Association for Computational Linguistics","author":[{"propositions":[],"lastnames":["Keersmaekers"],"firstnames":["Alek"],"suffixes":[]}],"month":"August","year":"2021","pages":"39–50","bibtex":"@inproceedings{keersmaekers_glaux_2021,\n\taddress = {Online},\n\ttitle = {The {GLAUx} corpus: methodological issues in designing a long-term, diverse, multi-layered corpus of {Ancient} {Greek}},\n\turl = {https://aclanthology.org/2021.lchange-1.6},\n\tdoi = {10.18653/v1/2021.lchange-1.6},\n\tabstract = {This paper describes the GLAUx project (“the Greek Language Automated”), an ongoing effort to develop a large long-term diachronic corpus of Greek, covering sixteen centuries of literary and non-literary material annotated with NLP methods. After providing an overview of related corpus projects and discussing the general architecture of the corpus, it zooms in on a number of larger methodological issues in the design of historical corpora. These include the encoding of textual variants, handling extralinguistic variation and annotating linguistic ambiguity. Finally, the long- and short-term perspectives of this project are discussed.},\n\tbooktitle = {Proceedings of the 2nd {International} {Workshop} on {Computational} {Approaches} to {Historical} {Language} {Change} 2021},\n\tpublisher = {Association for Computational Linguistics},\n\tauthor = {Keersmaekers, Alek},\n\tmonth = aug,\n\tyear = {2021},\n\tpages = {39--50},\n}\n\n","author_short":["Keersmaekers, A."],"key":"keersmaekers_glaux_2021","id":"keersmaekers_glaux_2021","bibbaseid":"keersmaekers-theglauxcorpusmethodologicalissuesindesigningalongtermdiversemultilayeredcorpusofancientgreek-2021","role":"author","urls":{"Paper":"https://aclanthology.org/2021.lchange-1.6"},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://api.zotero.org/users/70366/collections/SBZYWP25/items?key=7yoN7Conu0uooeF81cr44zZv&format=bibtex&limit=100","dataSources":["zDmhNkxEe4LWMorau","JFDnASMkoQCjjGL8E"],"keywords":[],"search_terms":["glaux","corpus","methodological","issues","designing","long","term","diverse","multi","layered","corpus","ancient","greek","keersmaekers"],"title":"The GLAUx corpus: methodological issues in designing a long-term, diverse, multi-layered corpus of Ancient Greek","year":2021}