RIDGES Herbology: designing a diachronic multi-layer corpus. Odebrecht, C., Belz, M., Zeldes, A., Lüdeling, A., & Krause, T. Language Resources and Evaluation, 51(3):695–725, September, 2017. doi abstract bibtex This paper introduces a multi-layer corpus architecture with multiple tokenizations using the open source historical, diachronic corpus of German called Register in Diachronic German Science. The corpus contains herbal texts printed between the fifteenth and nineteenth centuries and is concerned with the development of a German scientific register, independent of Latin. We will discuss difficulties of transcribing, normalizing and annotating historical texts and will thereby argue for the advantages of multiple layers and multiple tokenizations. A virtually infinite number of annotations can be added to the corpus, without the need for deciding between or discarding interpretations. Thus, this flexible architecture enables multiple normalizations and types of annotation and is open to a wide range of research questions in the humanities. We provide case studies concerning the exploitation of our different normalizations as well as structural, register-specific and linguistic annotations. The corpus architecture allows for its reuse as a resource for corpus-based research approaches.
@article{OdebrechtEtAl2017,
title = {{RIDGES} {Herbology}: designing a diachronic multi-layer corpus},
volume = {51},
copyright = {All rights reserved},
issn = {1574-0218},
doi = {10.1007/s10579-016-9374-3},
abstract = {This paper introduces a multi-layer corpus architecture with multiple tokenizations using the open source historical, diachronic corpus of German called Register in Diachronic German Science. The corpus contains herbal texts printed between the fifteenth and nineteenth centuries and is concerned with the development of a German scientific register, independent of Latin. We will discuss difficulties of transcribing, normalizing and annotating historical texts and will thereby argue for the advantages of multiple layers and multiple tokenizations. A virtually infinite number of annotations can be added to the corpus, without the need for deciding between or discarding interpretations. Thus, this flexible architecture enables multiple normalizations and types of annotation and is open to a wide range of research questions in the humanities. We provide case studies concerning the exploitation of our different normalizations as well as structural, register-specific and linguistic annotations. The corpus architecture allows for its reuse as a resource for corpus-based research approaches.},
number = {3},
journal = {Language Resources and Evaluation},
author = {Odebrecht, Carolin and Belz, Malte and Zeldes, Amir and Lüdeling, Anke and Krause, Thomas},
month = sep,
year = {2017},
pages = {695--725},
}
Downloads: 0
{"_id":"jKRDPNPkAptTjytRN","bibbaseid":"odebrecht-belz-zeldes-ldeling-krause-ridgesherbologydesigningadiachronicmultilayercorpus-2017","author_short":["Odebrecht, C.","Belz, M.","Zeldes, A.","Lüdeling, A.","Krause, T."],"bibdata":{"bibtype":"article","type":"article","title":"RIDGES Herbology: designing a diachronic multi-layer corpus","volume":"51","copyright":"All rights reserved","issn":"1574-0218","doi":"10.1007/s10579-016-9374-3","abstract":"This paper introduces a multi-layer corpus architecture with multiple tokenizations using the open source historical, diachronic corpus of German called Register in Diachronic German Science. The corpus contains herbal texts printed between the fifteenth and nineteenth centuries and is concerned with the development of a German scientific register, independent of Latin. We will discuss difficulties of transcribing, normalizing and annotating historical texts and will thereby argue for the advantages of multiple layers and multiple tokenizations. A virtually infinite number of annotations can be added to the corpus, without the need for deciding between or discarding interpretations. Thus, this flexible architecture enables multiple normalizations and types of annotation and is open to a wide range of research questions in the humanities. We provide case studies concerning the exploitation of our different normalizations as well as structural, register-specific and linguistic annotations. The corpus architecture allows for its reuse as a resource for corpus-based research approaches.","number":"3","journal":"Language Resources and Evaluation","author":[{"propositions":[],"lastnames":["Odebrecht"],"firstnames":["Carolin"],"suffixes":[]},{"propositions":[],"lastnames":["Belz"],"firstnames":["Malte"],"suffixes":[]},{"propositions":[],"lastnames":["Zeldes"],"firstnames":["Amir"],"suffixes":[]},{"propositions":[],"lastnames":["Lüdeling"],"firstnames":["Anke"],"suffixes":[]},{"propositions":[],"lastnames":["Krause"],"firstnames":["Thomas"],"suffixes":[]}],"month":"September","year":"2017","pages":"695–725","bibtex":"@article{OdebrechtEtAl2017,\n\ttitle = {{RIDGES} {Herbology}: designing a diachronic multi-layer corpus},\n\tvolume = {51},\n\tcopyright = {All rights reserved},\n\tissn = {1574-0218},\n\tdoi = {10.1007/s10579-016-9374-3},\n\tabstract = {This paper introduces a multi-layer corpus architecture with multiple tokenizations using the open source historical, diachronic corpus of German called Register in Diachronic German Science. The corpus contains herbal texts printed between the fifteenth and nineteenth centuries and is concerned with the development of a German scientific register, independent of Latin. We will discuss difficulties of transcribing, normalizing and annotating historical texts and will thereby argue for the advantages of multiple layers and multiple tokenizations. A virtually infinite number of annotations can be added to the corpus, without the need for deciding between or discarding interpretations. Thus, this flexible architecture enables multiple normalizations and types of annotation and is open to a wide range of research questions in the humanities. We provide case studies concerning the exploitation of our different normalizations as well as structural, register-specific and linguistic annotations. The corpus architecture allows for its reuse as a resource for corpus-based research approaches.},\n\tnumber = {3},\n\tjournal = {Language Resources and Evaluation},\n\tauthor = {Odebrecht, Carolin and Belz, Malte and Zeldes, Amir and Lüdeling, Anke and Krause, Thomas},\n\tmonth = sep,\n\tyear = {2017},\n\tpages = {695--725},\n}\n\n","author_short":["Odebrecht, C.","Belz, M.","Zeldes, A.","Lüdeling, A.","Krause, T."],"key":"OdebrechtEtAl2017","id":"OdebrechtEtAl2017","bibbaseid":"odebrecht-belz-zeldes-ldeling-krause-ridgesherbologydesigningadiachronicmultilayercorpus-2017","role":"author","urls":{},"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://api.zotero.org/users/5169853/collections/ZA332PCG/items?key=L84cgLKRq7Xw7mEYtooc56Yl&format=bibtex&limit=100","dataSources":["u4ivYBfvRY5aDY8x4"],"keywords":[],"search_terms":["ridges","herbology","designing","diachronic","multi","layer","corpus","odebrecht","belz","zeldes","lüdeling","krause"],"title":"RIDGES Herbology: designing a diachronic multi-layer corpus","year":2017}