<script src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fschulzkx%2F5158478&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fschulzkx%2F5158478");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fschulzkx%2F5158478"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@article{beyer_rezension_2024, series = {Latein und {Griechisch} in {Berlin} und {Brandenburg}}, title = {Rezension zu „{KI}-{Bildung} im {Lateinunterricht} – {Ein} schulpraktischer {Leitfaden}“ der {Reihe} {KI}-{Bildung} im {Ovid} {Verlag}, {Hrsg}. {Rudolf} {Henneböhl}}, volume = {68}, issn = {0945-2257}, url = {https://davbb.de/sammlung-ausgaben-lgbb/lgbb-2-2024/}, number = {2}, journal = {LGBB}, author = {Beyer, Andrea}, year = {2024}, pages = {148--152}, }
@article{beyer_ki_2024, title = {{KI} im altsprachlichen {Unterricht}}, volume = {5}, url = {https://www.biejournals.de/index.php/lgnrw/article/view/7309/6512}, doi = {https://doi.org/10.11576/lgnrw-7309}, number = {1}, journal = {LGNRW}, author = {Beyer, Andrea}, year = {2024}, pages = {9--15}, }
@inproceedings{schulz_seflag_2024, address = {Miami, USA}, title = {{SEFLAG}: {Systematic} {Evaluation} {Framework} for {NLP} {Models} and {Datasets} in {Latin} and {Ancient} {Greek}}, shorttitle = {{SEFLAG}}, url = {https://aclanthology.org/2024.nlp4dh-1.24}, abstract = {Literary scholars of Latin and Ancient Greek increasingly use natural language processing for their work, but many models and datasets are hard to use due to a lack of sustainable research data management. This paper introduces the Systematic Evaluation Framework for natural language processing models and datasets in Latin and Ancient Greek (SEFLAG), which consistently assesses language resources using common criteria, such as specific evaluation metrics, metadata and risk analysis. The framework, a work in progress in its initial phase, currently covers lemmatization and named entity recognition for both languages, with plans for adding dependency parsing and other tasks. For increased transparency and sustainability, a thorough documentation is included as well as an integration into the HuggingFace ecosystem. The combination of these efforts is designed to support researchers in their search for suitable models.}, urldate = {2024-11-12}, booktitle = {Proceedings of the 4th {International} {Conference} on {Natural} {Language} {Processing} for {Digital} {Humanities}}, publisher = {Association for Computational Linguistics}, author = {Schulz, Konstantin and Deichsler, Florian}, editor = {Hämäläinen, Mika and Öhman, Emily and Miyagawa, So and Alnajjar, Khalid and Bizzoni, Yuri}, month = nov, year = {2024}, pages = {247--258}, }
@misc{beyer_ki_2024, title = {{KI} und Übersetzen – {Literaturunterricht} {Latein}}, url = {https://zenodo.org/records/14063591}, abstract = {In diesem Webinar werden vier Themenkomplexe interaktiv erarbeitet: Was ist Übersetzen? Was ist Maschinelles Übersetzen? KI und Übersetzungen literarischer Texte KI und Übersetzungshilfen}, language = {deu}, urldate = {2024-11-12}, author = {Beyer, Andrea}, month = nov, year = {2024}, doi = {10.5281/zenodo.14063591}, keywords = {AI, Latin, Machine Translation, Translation}, }
@misc{beyer_ki_2024, title = {{KI} im {Spracherwerb} der historischen {Sprache} {Latein}}, url = {https://zenodo.org/records/14033264}, abstract = {Der Vortrag befasst sich mit der Rolle und den Einsatzmöglichkeiten von Künstlicher Intelligenz (KI) beim Erwerb der historischen Sprache Latein. Die Präsentation gliedert sich in vier Abschnitte: Grundlagen: Es erfolgt eine Einführung in die Termini KI, Spracherwerb und Latein. Dabei werden die Besonderheiten des Lateinischen als historische Sprache und die spezifischen Herausforderungen beim Spracherwerb dargestellt. KI-Bildung (Bildungswissenschaft): Es werden verschiedene Kompetenzbereiche der KI-Bildung vorgestellt und anhand eines Fallbeispiels für den Lateinunterricht konkretisiert. Spracherwerb (Psycholinguistik): Es werden Möglichkeiten aufgezeigt, wie KI zur Modellierung des lateinischen Spracherwerbs genutzt werden kann, etwa durch den Transfer moderner Erwerbsstufenmodelle oder die Entwicklung von Personas zur Generierung synthetischer Sprachdaten. KI als Tutor (Didaktik): Es erfolgt ein knapper Überblick über den Einsatz von KI-Chatbots als Tutor und Intelligente Tutoring Systeme (ITS). Dabei werden Themen wie individualisierte Lernunterstützung, adaptives Feedback und personalisierte Lernpfade angeschnitten.}, language = {deu}, urldate = {2024-11-04}, author = {Beyer, Andrea}, month = nov, year = {2024}, doi = {10.5281/zenodo.14033264}, keywords = {AI, Historical Language, Language Acquisition, Latin}, }
@misc{beyer_daidalos-projekt_2024, title = {Daidalos-{Projekt} - {Entwicklung} einer {Infrastruktur} zum {Einsatz} von {Natural} {Language} {Processing} für {Forschende} der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/12635794}, abstract = {Project proposal approved by the German Research Council as part of their Funding Programme "e-Research Technologies" : https://www.dfg.de/en/research-funding/funding-opportunities/programmes/infrastructure/lis/funding-opportunities/e-research-technologies}, language = {deu}, urldate = {2024-07-03}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jul, year = {2024}, doi = {10.5281/zenodo.12635794}, note = {Publisher: Zenodo}, keywords = {Artificial intelligence, Classics, Computer and information sciences, Linguistics, Natural language processing}, }
@misc{beyer_digital_2024, title = {Digital {Literacies}}, url = {https://zenodo.org/records/10515036}, abstract = {The slides provide a brief overview of different concepts and frameworks relating to digital literacy, data literacy, and ai literacy.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = jan, year = {2024}, doi = {10.5281/zenodo.10515036}, }
@misc{beyer_ki_2024, title = {{KI} und {Lateinunterricht}}, url = {https://zenodo.org/records/10829822}, abstract = {Präsentationen zu einer Fortbildungsveranstaltung für Lehrkräfte in Jena, 16.3.2024 Teil 1: (generative) KI im Lateinunterricht inkl. einiger Aufgabenbeispiele und deren "Lösung" mittels KI Teil 2: Schwerpunktthema: Bewerten unter den Auspizien von KI}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = mar, year = {2024}, doi = {10.5281/zenodo.10829822}, keywords = {AI Literacy, KI, KI und Prüfen, KI-Bildung, KI-Ethik, Lateinunterricht}, }
@misc{beyer_nlp-infrastruktur_2024, title = {Eine {NLP}-{Infrastruktur} für {KI}-skeptische {User}}, url = {https://zenodo.org/records/12199989}, abstract = {The talk was given at the Research Lounge hosted by the Vice President for Research of Humboldt-Universität zu Berlin.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea and Kotschka, Florian}, month = jun, year = {2024}, doi = {10.5281/zenodo.12199989}, keywords = {Classics, Computer and information sciences, Literary Studies, Literature studies, NLP}, }
@misc{beyer_can_2024, title = {Can {Jupyter} {Help} {Daidalos}? {Or}: {How} to {Develop} {Digital} {Literacies} and {Assess} {Them}?}, shorttitle = {Can {Jupyter} {Help} {Daidalos}?}, url = {https://zenodo.org/records/12200296}, abstract = {The Daidalos project is developing an infrastructure that will enable researchers in Classical Philology and related disciplines to apply various methods of natural language processing to research corpora they have built themselves. While working closely with Classical Philologists to understand their needs in terms of functionality and design of the infrastructure, we have – not unexpectedly – encountered the biggest challenge to the success of our project: How can potential users develop research questions and investigate them using Daidalos if their research expertise does not include the necessary digital skills? Or the other way round: How can we assess the level of competence of our users in order to provide them with useful and possibly personalised support?We therefore decided to address this challenge by firstly developing a domain-specific and case-sensitive competency model of digital literacies (i. e. information, data and AI literacy) and secondly by providing curated learning materials in the form of Jupyter Notebooks, among other things. These Jupyter Notebooks are widely used in teaching and assessment because they are said to provide low-threshold access to programming through their function as interactive worksheets. Although they are unknown in our own community, we wanted to offer them as a way to improve one’s digital literacies. But when implementing the first Jupyter Notebook, we encountered a familiar problem: how granular should the content be prepared and explained? Or to put it more generally: Are Jupyter Notebooks really as accessible as they claim to be?After an introduction to the project, the concept of digital literacies used, and an example of a Jupyter Notebook prepared for different proficiency levels, we will discuss how researchers can be motivated to improve their domain-specific digital literacies in order to enhance their overall research and teaching expertise. [Roundtable discussion]}, language = {eng}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = jun, year = {2024}, doi = {10.5281/zenodo.12200296}, keywords = {Classics, Digital Literacies, Digital humanities, Jupyter Notebooks}, }
@misc{beyer_daidalos_2024, title = {Daidalos: {NER} for {Literary} {Studies} on {Latin} and {Ancient} {Greek} {Texts}}, shorttitle = {Daidalos}, url = {https://zenodo.org/records/12582628}, abstract = {The talk was given at the Conference "Nomina Omina" in Leipzig.}, language = {eng}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = jun, year = {2024}, doi = {10.5281/zenodo.12582628}, keywords = {Classics, Literature studies, NER, NLP}, }
@misc{beyer_fach-_2024, title = {Fach- und fallspezifische {KI}-{Bildung} in den {Geisteswissenschaften}}, url = {https://zenodo.org/records/13757021}, abstract = {Die Präsentation „Fach- und fallspezifische KI-Bildung in den Geisteswissenschaften“ beleuchtet die Bedeutung einer gezielten KI-Ausbildung für die Geisteswissenschaften. Im Zentrum steht die Notwendigkeit, KI-Kompetenzen zu entwickeln, um den Herausforderungen der digitalen Transformation in Forschung und Lehre gerecht zu werden. Die Einführung stellt dar, warum KI-Bildung für eine informierte Auseinandersetzung mit gesellschaftlichen und wissenschaftlichen Fragen unerlässlich ist, etwa bei der Erkennung von Bias, der Nutzung von KI-Tools im Alltag oder der Analyse von Deepfakes. Es wird ein mehrstufiges Konzept der KI-Bildung vorgestellt, das sich an Lernende ohne informatische Vorkenntnisse richtet und Fach- sowie Fallbeispiele aus den Geisteswissenschaften nutzt, um den praktischen Nutzen von KI-Anwendungen zu verdeutlichen. Der Schwerpunkt liegt auf der Vermittlung von AI Literacy, Data Literacy und Digital Literacy auf verschiedenen Kompetenzstufen – von Anfänger bis Experte. Dabei wird verdeutlicht, wie diese Kompetenzen gezielt für die Analyse historischer und literarischer Fragestellungen eingesetzt werden können. Abschließend wird die Rolle der KI-Bildung sowohl für Lehrende als auch für Forschende und Studierende in den Geisteswissenschaften betont.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2024}, doi = {10.5281/zenodo.13757021}, keywords = {Artificial intelligence, Classics, Educational sciences, Languages and literature, Natural language processing}, }
@misc{schulz_einfuhrung_2024, title = {Einführung in {Natural} {Language} {Processing} anhand von {Plinius}' {Brief} 1,8}, url = {https://zenodo.org/records/13907150}, abstract = {Die Präsentation behandelt die methodische Analyse von Plinius' Briefen, insbesondere die Herausforderungen der Selbstdarstellung in epist. 1,8. Verschiedene linguistische und computerlinguistische Verfahren werden eingesetzt, um den Text digital zu untersuchen. Zunächst wird die Lemmatisierung eingeführt, die die Reduzierung flektierter Formen auf ihre Grundform beschreibt. Dadurch wird eine bessere Durchsuchbarkeit und Vergleichbarkeit von Texten ermöglicht. Darauf folgt das Part-of-Speech-Tagging, bei dem Wortarten im Text annotiert werden. Probleme wie die Mehrdeutigkeit und Schwierigkeiten in den Trainingsdaten des Algorithmus werden hervorgehoben. Zudem wird eine Sentimentanalyse durchgeführt, die emotionale Bewertungen und Meinungen im Text identifiziert. Ferner kommen Word Embeddings zum Einsatz, um semantische Beziehungen zwischen Wörtern darzustellen. Abschließend wird die Syntax des Plinius-Briefs mittels Treebanking, also der systematischen Erfassung syntaktischer Abhängigkeiten zwischen Wörtern in einem Satz, analysiert. Der Vortrag zeigt, wie digitale Werkzeuge klassisch-philologische Fragestellungen unterstützen und neue Erkenntnisse über Plinius' Werk ermöglichen, insbesondere in Bezug auf die Darstellung von Ruhm, Bescheidenheit und Selbstreflexion.}, language = {deu}, urldate = {2024-10-09}, author = {Schulz, Konstantin}, month = may, year = {2024}, doi = {10.5281/zenodo.13907150}, keywords = {Classics, Computational Linguistics, Languages and literature, Latin, Natural Language Processing}, }
@misc{beyer_digitale_2024, title = {Digitale {Methoden} in der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/10529746}, abstract = {Folien zum Vortrag als Gastbeitrag in der Vorlesung »Grundfragen der lateinischen Literatur« an der Katholischen Universität Eichstätt-Ingolstadt}, language = {deu}, urldate = {2024-03-18}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2024}, doi = {10.5281/zenodo.10529746}, keywords = {Classics, computational literary studies, digital humanities, natural language processing}, }
@misc{beyer_generative_2024, title = {Generative {KI} und ihre {Bedeutung} für {Bewertungskontexte}}, url = {https://zenodo.org/records/10569149}, abstract = {Präsentation zu einer Fortbildungsveranstaltung zum Thema generative KI und Leistungsmessung an einem Gymnasium in NRW, 29.1.2024.}, language = {deu}, urldate = {2024-03-18}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2024}, doi = {10.5281/zenodo.10569149}, keywords = {AI, AI Literacy, KI Ethik, KI und Prüfungen, KI-Bildung, generative KI}, }
@misc{beyer_nlp-infrastruktur_2024, title = {{NLP}-{Infrastruktur} für die {Klassische} {Philologie}}, url = {https://zenodo.org/records/10474686}, abstract = {Folien zum Vortrag im Kolloquium "Phänomenologie der Digital Humanities" des Lehrstuhls für Digital Humanities an der Freien Universität Berlin: https://wikis.fu-berlin.de/display/phaenodh}, language = {deu}, urldate = {2024-03-18}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2024}, doi = {10.5281/zenodo.10474686}, keywords = {Classics, computational literary studies, natural language processing, research infrastructure}, }
@misc{beyer_nlp-methoden_2024, title = {{NLP}-{Methoden} in der {Klassischen} {Philologie}: {Word} {Embeddings}}, shorttitle = {{NLP}-{Methoden} in der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/11582358}, abstract = {Folien zu einem Vortrag}, language = {deu}, urldate = {2024-06-12}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jun, year = {2024}, doi = {10.5281/zenodo.11582358}, keywords = {Artificial intelligence, Classics, Computer and information sciences, Languages and literature, Linguistics, Representation Learning, Word Embeddings}, }
@misc{schulz_kunstliche_2024, title = {Künstliche {Intelligenz} in der {Sprachverarbeitung}}, url = {https://zenodo.org/records/11190250}, abstract = {Folien zu einem Vortrag}, language = {deu}, urldate = {2024-05-14}, author = {Schulz, Konstantin}, month = may, year = {2024}, doi = {10.5281/zenodo.11190250}, keywords = {Computer and information sciences, Languages and literature, Linguistics}, }
@misc{beyer_reflexion_2024, title = {Reflexion mit und über {KI} im {AU}}, url = {https://zenodo.org/records/10909593}, abstract = {Folien zum Arbeitskreis beim Bundeskongress des Deutschen Altphilologenverbandes 2024 in Wuppertal}, language = {deu}, urldate = {2024-04-02}, author = {Beyer, Andrea and Schulz, Konstantin}, month = apr, year = {2024}, doi = {10.5281/zenodo.10909593}, keywords = {AI Literacy, Artificial intelligence, Classics, Second Language Acquisition}, }
@misc{faltin_automatisierte_2024, title = {Automatisierte {Kategorisierung} mittellateinischer {Bittbriefe} an den {Papst}}, url = {https://zenodo.org/records/13628818}, abstract = {Die Präsentation ist im Rahmen der 27. Aquilonia an der Christian-Albrechts-Universität zu Kiel entstanden.}, urldate = {2024-09-04}, author = {Faltin, Nico and Schulz, Konstantin}, month = jul, year = {2024}, doi = {10.5281/zenodo.13628818}, }
@misc{schulz_seflag_2024, title = {{SEFLAG}. {Systematic} {Evaluation} {Framework} for {NLP} {Models} and {Datasets} in {Latin} and {Ancient} {Greek}}, url = {https://zenodo.org/records/14012948}, abstract = {Das SEFLAG-Framework, präsentiert von Konstantin Schulz und Florian Deichsler (Humboldt-Universität zu Berlin), stellt ein systematisches Evaluationsframework für NLP-Modelle und -Datensätze für Latein und Altgriechisch vor. Die Hauptziele sind die Bewertung und Dokumentation bestehender NLP-Ressourcen, die Auswahl passender Modelle für Forschungsvorhaben sowie die Förderung der Interoperabilität durch standardisierte Annotationsrichtlinien. Diese Initiative reagiert auf den steigenden Einsatz von NLP in der Literaturwissenschaft historischer Sprachen und die damit verbundenen Herausforderungen, wie die Diversität an Modellen, die fehlende zentrale Verwaltung von Ressourcen und die Notwendigkeit einer systematischen Evaluation. SEFLAG konzentriert sich zunächst auf Lemmatisierung und Named Entity Recognition (NER) und plant zukünftig die Erweiterung um weitere NLP-Tasks. Die Ergebnisse werden auf Plattformen wie Hugging Face veröffentlicht und zielen darauf ab, der Forschungsgemeinschaft Zeit und Ressourcen zu sparen. Durch Dokumentation und Benchmarking wird eine nachhaltige Infrastruktur geschaffen, die Forschenden in den Bereichen Philologie, Geschichte und Archäologie zugutekommt und Innovationen im Bereich der historischen NLP-Methoden fördert.}, language = {deu}, urldate = {2024-10-31}, author = {Schulz, Konstantin}, month = oct, year = {2024}, doi = {10.5281/zenodo.14012948}, keywords = {Artificial intelligence, Computer and information sciences, Languages and literature, Linguistics, Natural language processing}, }
@misc{beyer_genki_2024, title = {{GenKI} im {Lateinunterricht} – {Texte} im {Spracherwerb}}, url = {https://zenodo.org/records/13902476}, abstract = {Generative KI (genKI) im Lateinunterricht (LU) beschäftigt nicht nur die einzelnen Lehrenden, sondern auch die Fortbildungsplaner. In diesem Vortrag wird eine sehr knappe Einführung zum Begriff generative KI und Prompting geboten. Kern der Fortbildung sind Anwendungsszenarien von genKI bei der Textarbeit im Lateinunterricht. Von besonderem Interesse sind die lateinischen, didaktisierten Texte im Rahmen des Spracherwerbs.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = oct, year = {2024}, doi = {10.5281/zenodo.13902476}, keywords = {Latin classes, generative AI, language acquisition}, }
@article{kuehnast_development_2024, title = {Development of basic reading skills in {Latin}: a corpus-based tool for computer-assisted fluency training}, volume = {11}, issn = {null}, shorttitle = {Development of basic reading skills in {Latin}}, url = {https://doi.org/10.1080/2331186X.2024.2416819}, doi = {10.1080/2331186X.2024.2416819}, abstract = {The present paper evaluates the processes of reading acquisition in Latin from the component-skills approach and discusses how advances in reading in modern foreign languages could be adapted to the specific needs of Latin as a historical language. Compared to the holistic and socially embedded approaches to modern foreign language acquisition, the grammar-translation method traditionally used in schools shows considerable weaknesses in the development of basic reading skills in Latin. Therefore, we address the possible advantages of corpus-based teaching strategies and present Machina Callida, a psycholinguistically informed e-tutor suitable for supporting Latin vocabulary acquisition and reading comprehension at beginner and intermediate levels. Using digital corpora of original Latin texts, the application semi-automatically generates contextualized vocabulary exercises tailored to the needs of different groups of learners. Through its integration with the research data repository Zenodo, Machina Callida supports online collaboration in the creation and distribution of open educational resources through crowdsourcing.}, number = {1}, urldate = {2024-10-22}, journal = {Cogent Education}, author = {Kuehnast, Milena and Schulz, Konstantin and Lüdeling, Anke}, month = dec, year = {2024}, note = {Publisher: Cogent OA \_eprint: https://doi.org/10.1080/2331186X.2024.2416819}, keywords = {CALL, Classical Language \& Literature, Classroom Practice, Databases, Language \& Linguistics, Language Teaching \& Learning, Latin, Open \& Distance Education and eLearning, Teaching \& Learning - Education, corpus resources, reading comprehension, vocabulary acquisition}, pages = {2416819}, }
@misc{sun_lalaeval_2024, title = {{LalaEval}: {A} {Holistic} {Human} {Evaluation} {Framework} for {Domain}-{Specific} {Large} {Language} {Models}}, shorttitle = {{LalaEval}}, url = {http://arxiv.org/abs/2408.13338}, doi = {10.48550/arXiv.2408.13338}, abstract = {This paper introduces LalaEval, a holistic framework designed for the human evaluation of domain-specific large language models (LLMs). LalaEval proposes a comprehensive suite of end-to-end protocols that cover five main components including domain specification, criteria establishment, benchmark dataset creation, construction of evaluation rubrics, and thorough analysis and interpretation of evaluation outcomes. This initiative aims to fill a crucial research gap by providing a systematic methodology for conducting standardized human evaluations within specific domains, a practice that, despite its widespread application, lacks substantial coverage in the literature and human evaluation are often criticized to be less reliable due to subjective factors, so standardized procedures adapted to the nuanced requirements of specific domains or even individual organizations are in great need. Furthermore, the paper demonstrates the framework's application within the logistics industry, presenting domain-specific evaluation benchmarks, datasets, and a comparative analysis of LLMs for the logistics domain use, highlighting the framework's capacity to elucidate performance differences and guide model selection and development for domain-specific LLMs. Through real-world deployment, the paper underscores the framework's effectiveness in advancing the field of domain-specific LLM evaluation, thereby contributing significantly to the ongoing discussion on LLMs' practical utility and performance in domain-specific applications.}, urldate = {2024-09-03}, publisher = {arXiv}, author = {Sun, Chongyan and Lin, Ken and Wang, Shiwei and Wu, Hulong and Fu, Chengfei and Wang, Zhen}, month = aug, year = {2024}, note = {arXiv:2408.13338 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Human-Computer Interaction}, }
@misc{liang_whats_2024, title = {What's documented in {AI}? {Systematic} {Analysis} of {32K} {AI} {Model} {Cards}}, shorttitle = {What's documented in {AI}?}, url = {http://arxiv.org/abs/2402.05160}, doi = {10.48550/arXiv.2402.05160}, abstract = {The rapid proliferation of AI models has underscored the importance of thorough documentation, as it enables users to understand, trust, and effectively utilize these models in various applications. Although developers are encouraged to produce model cards, it's not clear how much information or what information these cards contain. In this study, we conduct a comprehensive analysis of 32,111 AI model documentations on Hugging Face, a leading platform for distributing and deploying AI models. Our investigation sheds light on the prevailing model card documentation practices. Most of the AI models with substantial downloads provide model cards, though the cards have uneven informativeness. We find that sections addressing environmental impact, limitations, and evaluation exhibit the lowest filled-out rates, while the training section is the most consistently filled-out. We analyze the content of each section to characterize practitioners' priorities. Interestingly, there are substantial discussions of data, sometimes with equal or even greater emphasis than the model itself. To evaluate the impact of model cards, we conducted an intervention study by adding detailed model cards to 42 popular models which had no or sparse model cards previously. We find that adding model cards is moderately correlated with an increase weekly download rates. Our study opens up a new perspective for analyzing community norms and practices for model documentation through large-scale data science and linguistics analysis.}, urldate = {2024-09-03}, publisher = {arXiv}, author = {Liang, Weixin and Rajani, Nazneen and Yang, Xinyu and Ozoani, Ezinwanne and Wu, Eric and Chen, Yiqun and Smith, Daniel Scott and Zou, James}, month = feb, year = {2024}, note = {arXiv:2402.05160 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Computer Science - Software Engineering}, }
@misc{liu_automatic_2024, title = {Automatic {Generation} of {Model} and {Data} {Cards}: {A} {Step} {Towards} {Responsible} {AI}}, shorttitle = {Automatic {Generation} of {Model} and {Data} {Cards}}, url = {https://arxiv.org/abs/2405.06258v2}, abstract = {In an era of model and data proliferation in machine learning/AI especially marked by the rapid advancement of open-sourced technologies, there arises a critical need for standardized consistent documentation. Our work addresses the information incompleteness in current human-generated model and data cards. We propose an automated generation approach using Large Language Models (LLMs). Our key contributions include the establishment of CardBench, a comprehensive dataset aggregated from over 4.8k model cards and 1.4k data cards, coupled with the development of the CardGen pipeline comprising a two-step retrieval process. Our approach exhibits enhanced completeness, objectivity, and faithfulness in generated model and data cards, a significant step in responsible AI documentation practices ensuring better accountability and traceability.}, language = {en}, urldate = {2024-09-03}, journal = {arXiv.org}, author = {Liu, Jiarui and Li, Wenkai and Jin, Zhijing and Diab, Mona}, month = may, year = {2024}, }
@inproceedings{martinelli_exploring_2024, title = {Exploring {Neural} {Topic} {Modeling} on a {Classical} {Latin} {Corpus}}, author = {Martinelli, Ginevra and Impicciché, Paola and Fersini, Elisabetta and Mambrini, Francesco and Passarotti, Marco}, year = {2024}, pages = {6929--6934}, }
@article{stopponi_agree_2024, title = {{AGREE}: a new benchmark for the evaluation of distributional semantic models of ancient {Greek}}, shorttitle = {{AGREE}}, url = {https://research.rug.nl/en/publications/agree-a-new-benchmark-for-the-evaluation-of-distributional-semant}, urldate = {2024-04-05}, journal = {Digital Scholarship in the Humanities}, author = {Stopponi, Silvia and Peels-Matthey, Saskia and Nissim, Malvina}, year = {2024}, note = {Publisher: Oxford University Press}, }
@article{stopponi_natural_2024, title = {Natural {Language} {Processing} for {Ancient} {Greek}: {Design}, advantages and challenges of language models}, issn = {0176-4225}, journal = {Diachronica}, author = {Stopponi, Silvia and Pedrazzini, Nilo and Peels-Matthey, Saskia and McGillivray, Barbara and Nissim, Malvina}, year = {2024}, note = {Publisher: John Benjamins Publishing Company Amsterdam/Philadelphia}, }
@inproceedings{stussi_part--speech_2024, title = {Part-of-{Speech} {Tagging} of 16th-{Century} {Latin} with {GPT}}, url = {https://aclanthology.org/2024.latechclfl-1.18.pdf}, booktitle = {Proceedings of the 8th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature} ({LaTeCH}-{CLfL} 2024)}, author = {Stüssi, Elina and Ströbel, Phillip}, year = {2024}, pages = {196--206}, }
@misc{noauthor_digital_2024, type = {online resource}, title = {Digital {Tools} for {Learning} {Ancient} {Greek} and {Latin} and {Guiding} {Phrases} for {Using} {Generative} {AI} in {Ancient} {Language} {Study}}, url = {https://figshare.com/articles/online_resource/Digital_Tools_for_Learning_Ancient_Greek_and_Latin_and_Guiding_Phrases_for_Using_Generative_AI_in_Ancient_Language_Study/25391782/3}, abstract = {This document is a short introductory guide to the digital tools available for supporting the study of Ancient Greek and Latin. The first part of this guide is a list of our preferred digital tools for supporting Ancient Greek and Latin learning. This list is not exhaustive, but it does include a variety of generative AI tools and their ideal uses for supporting ancient language learning. The second part of this guide is a series of pre-prepared prompts which can be copy-pasted into a conversational AI tool to guide the conversation towards your expected learning level. Before using generative AI to support your studies, make sure to take a look at some of our instructional videos about the ethics of using generative AI. \#STOPandTHINKbeforeyouGENERATE}, language = {en}, urldate = {2024-05-21}, journal = {figshare}, month = mar, year = {2024}, doi = {10.6084/m9.figshare.25391782.v3}, note = {Publisher: figshare}, }
@inproceedings{sprugnoli_overview_2024, title = {Overview of the {EvaLatin} 2024 evaluation campaign}, url = {https://aclanthology.org/2024.lt4hala-1.21.pdf}, booktitle = {Proceedings of the {Third} {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages} ({LT4HALA})@ {LREC}-{COLING}-2024}, author = {Sprugnoli, Rachele and Iurescia, Federica and Passarotti, Marco}, year = {2024}, pages = {190--197}, }
@misc{krause_graphannis_2024, title = {{graphANNIS}}, copyright = {Apache-2.0}, url = {https://github.com/korpling/graphANNIS}, abstract = {This is a new backend implementation of the ANNIS linguistic search and visualization system.}, urldate = {2024-02-27}, author = {Krause, Thomas}, month = feb, year = {2024}, doi = {10.5281/zenodo.2598164}, }
@article{stussi_part--speech_2024, title = {Part-of-{Speech} {Tagging} of 16th-{Century} {Latin} with {GPT}}, url = {https://aclanthology.org/2024.latechclfl-1.18.pdf}, abstract = {Part-of-speech tagging is foundational to natural language processing, transcending mere linguistic functions. However, taggers optimized for Classical Latin struggle when faced with diverse linguistic eras shaped by the language’s evolution. Exploring 16th-century Latin from the correspondence and assessing five Latin treebanks, we focused on carefully evaluating tagger accuracy and refining Large Language Models for improved performance in this nuanced linguistic context. Our discoveries unveiled the competitive accuracies of different versions of GPT, particularly after fine-tuning. Notably, our best fine-tuned model soared to an average accuracy of 88.99\% over the treebank data, underscoring the remarkable adaptability and learning capabilities when fine-tuned to the specific intricacies of Latin texts. Next to emphasising GPT’s part-of-speech tagging capabilities, our second aim is to strengthen taggers’ adaptability across different periods. We establish solid groundwork for using Large Language Models in specific natural language processing tasks where part-of-speech tagging is often employed as a pre-processing step. This work significantly advances the use of modern language models in interpreting historical language, bridging the gap between past linguistic epochs and modern computational linguistics.}, language = {en}, journal = {Proceedings of LaTeCH-CLfL 2024}, author = {Stüssi, Elina and Ströbel, Phillip Benjamin}, year = {2024}, pages = {196--206}, }
@article{rebora_sentiment_2023, title = {Sentiment {Analysis} in {Literary} {Studies}. {A} {Critical} {Survey}}, volume = {17}, url = {https://digitalhumanities.org/dhq/vol/17/2/000691/000691.html}, number = {2}, journal = {digital humanities quaterly}, author = {Rebora, Simone}, year = {2023}, }
@article{sprugnoli_sentiment_2023, title = {The {Sentiment} of {Latin} {Poetry}. {Annotation} and {Automatic} {Analysis} of the {Odes} of {Horace}}, volume = {9}, issn = {2499-4553}, url = {https://journals.openedition.org/ijcol/1125}, number = {9-1}, journal = {IJCoL. Italian Journal of Computational Linguistics}, author = {Sprugnoli, Rachele and Mambrini, Francesco and Passarotti, Marco and Moretti, Giovanni}, year = {2023}, note = {Publisher: Accademia University Press}, pages = {53--71}, }
@misc{beyer_digitalgestutzte_2023, title = {Digitalgestützte {Textanalyse} in {Forschung} und {Lehre}}, url = {https://zenodo.org/records/8388745}, abstract = {Folien zum Vortrag bei der Konferenz "(Digitale) Chancen für den Lateinunterricht": https://www.altphil.uni-freiburg.de/termine/digilat2023}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = sep, year = {2023}, doi = {10.5281/zenodo.8388745}, keywords = {Digital Classics, Digital Humanities, Latin language, language learning}, }
@misc{beyer_chatbot_2023, address = {Berlin, Germany}, title = {@chatbot: warum kannst du latein et quo vadis?}, shorttitle = {@chatbot}, url = {https://zenodo.org/record/8412875}, abstract = {Folien zum Vortrag bei der Fortbildung "@chatbot: warum kannst du latein et quo vadis?": http://www.davbb.de/veranstaltungen-fortbildungen/248-chatbot}, language = {deu}, urldate = {2023-10-06}, author = {Beyer, Andrea and Schulz, Konstantin}, month = oct, year = {2023}, doi = {10.5281/zenodo.8412875}, keywords = {Latin language, artificial intelligence, artificial intelligence literacy, language learning}, }
@inproceedings{beyer_daidalos_2023, address = {Berlin}, title = {{DAIdalos}: {Forschen} und {Lernen} zugleich?}, isbn = {978-3-88579-731-9}, shorttitle = {{DAIdalos}}, url = {https://dl.gi.de/handle/20.500.12116/43162}, doi = {10.18420/inf2023_42}, abstract = {Die Daidalos-Infrastruktur soll es Forschenden der Klassischen Philologie und verwandter Disziplinen ermöglichen, verschiedene Methoden des Natural Language Processing an selbst zusammengestellten Forschungskorpora anzuwenden. Dabei ist Daidalos als interaktive Lern- und Forschungsinfrastruktur konzipiert, die den Ausbau wesentlicher Teilfähigkeiten von Data Literacy, z. B. die Zusammenstellung und Analyse von Korpora oder den Umgang mit Annotationen, TEI-XML und graphischen Auswertungen, unterstützt.}, language = {de}, urldate = {2023-12-14}, publisher = {Gesellschaft für Informatik e.V.}, author = {Beyer, Andrea and Schulz, Konstantin}, year = {2023}, pages = {391--393}, }
@misc{beyer_daidalos_2023, title = {Daidalos: {Forschen} und {Lernen} zugleich? {Data} {Literacy} als {Lernaufgabe} für die {Klassisch}-philologische {Forschung}}, shorttitle = {Daidalos}, url = {https://zenodo.org/record/8388900}, abstract = {Poster zur Präsentation des Daidalos-Projekts beim "Workshop KI-Bildung. Ein Workshop zu Aus- und Weiterbildung über Künstliche Intelligenz im Rahmen der GI-Tagung INFORMATIK 2023" an der HTW Berlin}, language = {deu}, urldate = {2023-09-29}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8388900}, keywords = {Digital Classics, data literacy, natural language processing, research infrastructure}, }
@misc{beyer_data_2023, address = {Tübingen, Germany}, title = {Data {Literacy} für die {Klassische} {Philologie}: {dAIdalos} – eine interaktive {Infrastruktur} als {Lernangebot}}, shorttitle = {Data {Literacy} für die {Klassische} {Philologie}}, url = {https://zenodo.org/record/8420565}, doi = {10.5281/zenodo.8420565}, abstract = {Abstract für das gleichnamige Poster bei der Konferenz "FORGE 2023 - Forschungsdaten in den Geisteswissenschaften: Anything Goes?! Forschungsdaten in den Geisteswissenschaften - kritisch betrachtet": https://forge23.uni-tuebingen.de/}, language = {deu}, urldate = {2023-10-09}, publisher = {Zenodo}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, keywords = {Digital Classics, data literacy, open educational resources, research infrastructure}, }
@misc{beyer_ki-bildung_2023, address = {Bielefeld, Germany}, title = {{KI}-{Bildung}: {Was}, warum und wie?}, shorttitle = {{KI}-{Bildung}}, url = {https://zenodo.org/record/8381513}, abstract = {Folien zum Vortrag bei der Fachtagung "Zwischen Wachstafel und ChatGPT – KI im Lateinunterricht": https://www.uni-bielefeld.de/fakultaeten/linguistik-literaturwissenschaft/studium-lehre/faecher/latein/projekte/fachtagung-ki-im-lateinun/index.xml}, language = {deu}, urldate = {2023-09-29}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8381513}, keywords = {Latin language, artificial intelligence, artificial intelligence literacy, language learning}, }
@misc{beyer_mit_2023, address = {Bielefeld, Germany}, title = {Mit und über {KI}-{Tools} im {Literaturunterricht} reflektieren}, url = {https://zenodo.org/record/8388817}, abstract = {Folien zum Workshop bei der Fachtagung "Zwischen Wachstafel und ChatGPT – KI im Lateinunterricht": https://www.uni-bielefeld.de/fakultaeten/linguistik-literaturwissenschaft/studium-lehre/faecher/latein/projekte/fachtagung-ki-im-lateinun/index.xml}, language = {deu}, urldate = {2023-09-29}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8388817}, keywords = {Latin language, artificial intelligence, artificial intelligence literacy, language learning}, }
@misc{beyer_data_2023, title = {Data {Literacy} für die {Klassische} {Philologie} - {dAIdalos} - eine interkative {Infrastruktur} als {Lernangebot}}, url = {https://zenodo.org/record/8392485}, abstract = {Das Poster informiert über das DFG-geförderte explorative Entwicklungsvorhaben Daidalos, das es Forschenden der Klassischen Philologie und verwandter Disziplinen ermöglichen soll, verschiedene Methoden des Natural Language Processing (NLP) an selbst zusammengestellten Forschungskorpora anzuwenden. Dabei ist Daidalos als interaktive Forschungsinfrastruktur konzipiert, die zugleich den Ausbau wesentlicher Teilfähigkeiten von Data Literacy, z. B. die Zusammenstellung und Analyse von Korpora oder den Umgang mit Annotationen, TEI-XML und graphischen Auswertungen, unterstützt. Hierzu sind vor allem forschungsorientierte, didaktische Lernbausteine und deren Implementierung in die Infrastruktur angedacht, um ein fach- und forschungsbezogenes Lernen zu ermöglichen.}, urldate = {2023-10-09}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8392485}, keywords = {Data Literacy, FORGE2023, JupyterLab, KI-Didaktik, NLP}, }
@article{hambarde_information_2023, title = {Information {Retrieval}: {Recent} {Advances} and {Beyond}}, volume = {11}, issn = {2169-3536}, shorttitle = {Information {Retrieval}}, url = {http://arxiv.org/abs/2301.08801}, doi = {10.1109/ACCESS.2023.3295776}, abstract = {In this paper, we provide a detailed overview of the models used for information retrieval in the first and second stages of the typical processing chain. We discuss the current state-of-the-art models, including methods based on terms, semantic retrieval, and neural. Additionally, we delve into the key topics related to the learning process of these models. This way, this survey offers a comprehensive understanding of the field and is of interest for for researchers and practitioners entering/working in the information retrieval domain.}, urldate = {2024-09-03}, journal = {IEEE Access}, author = {Hambarde, Kailash A. and Proenca, Hugo}, year = {2023}, note = {arXiv:2301.08801 [cs]}, keywords = {Computer Science - Information Retrieval}, pages = {76581--76604}, }
@inproceedings{celano_neural_2023, title = {A {Neural} {Network} {Approach} to {Ellipsis} {Detection} in {Ancient} {Greek}}, author = {Celano, Giuseppe GA}, year = {2023}, pages = {151--158}, }
@inproceedings{beersmans_training_2023, title = {Training and {Evaluation} of {Named} {Entity} {Recognition} {Models} for {Classical} {Latin}}, url = {https://zenodo.org/doi/10.5281/zenodo.8337363}, author = {Beersmans, Marijke and de Graaf, Evelien and Van de Cruys, Tim and Fantoli, Margherita}, year = {2023}, pages = {1--12}, }
@inproceedings{berti_named_2023, title = {Named {Entity} {Recognition} for a {Text}-{Based} {Catalog} of {Ancient} {Greek} {Authors} and {Works}}, url = {https://www.academia.edu/download/107756648/BERTI_Monica_Named_Entity_Recognition_for_a_Text_Based_Catal.pdf}, author = {Berti, Monica}, year = {2023}, }
@inproceedings{kostkan_odycygeneral-purpose_2023, title = {{OdyCy}–{A} general-purpose {NLP} pipeline for {Ancient} {Greek}}, url = {https://aclanthology.org/2023.latechclfl-1.14.pdf}, author = {Kostkan, Jan and Kardos, Márton and Mortensen, Jacob Palle Bliddal and Nielbo, Kristoffer Laigaard}, year = {2023}, pages = {128--134}, }
@article{ross_new_2023, title = {A {New} {Frontier}: {AI} and {Ancient} {Language} {Pedagogy}}, volume = {24}, issn = {2058-6310}, shorttitle = {A {New} {Frontier}}, url = {https://www.cambridge.org/core/journals/journal-of-classics-teaching/article/new-frontier-ai-and-ancient-language-pedagogy/A63EF69F5FE5529F0F45FB1EB655A9F7}, doi = {10.1017/S2058631023000430}, abstract = {In November 2022, ChatGPT 3.5 was released on a public research preview, gaining notoriety for its ability to pull from a vast body of information to create coherent and digestible bodies of text that accurately respond to queries (OpenAI, 2022). It is able to recognise the grammar and vocabulary of ancient languages, translate passages, and compose texts at an alarmingly accurate and rapid rate. For teachers, this AI has had mixed reviews. Some fear its ability to produce well-written work effortlessly, while others are excited by its abilities to push the boundaries of current teaching practices. This paper explores how well ChatGPT explains grammatical concepts, parses inflected forms, and translates Classical Latin, Ancient Greek, and Classical Sanskrit. Overall, ChatGPT is rather good at working with Classical Latin and Sanskrit, but its abilities with Ancient Greek are deeply problematic. Although it is quite flawed at this time, ChatGPT, when used properly, could become a useful a tool for ancient language study. With proper guiding phrases, students could use this AI to practise vocabulary, check their translations, and rephrase grammatical concepts.}, language = {en}, number = {48}, urldate = {2024-03-26}, journal = {Journal of Classics Teaching}, author = {Ross, Edward A. S.}, month = oct, year = {2023}, keywords = {Ancient Greek, Ancient Language Pedagogy, Artificial Intelligence, ChatGPT, Classical Latin, Classical Sanskrit, New Teaching Tools}, pages = {143--161}, }
@article{krahn_sentence_2023, title = {Sentence embedding models for {Ancient} {Greek} using multilingual knowledge distillation}, journal = {arXiv preprint arXiv:2308.13116}, author = {Krahn, Kevin and Tate, Derrick and Lamicela, Andrew C}, year = {2023}, }
@article{sansom_sedes_2023, title = {{SEDES}: {Metrical} {Position} in {Greek} {Hexameter}}, volume = {017}, issn = {1938-4122}, shorttitle = {{SEDES}}, url = {https://digitalhumanities.org/dhq/vol/17/2/000675/000675.html}, number = {2}, journal = {Digital Humanities Quarterly}, author = {Sansom, Stephen A. and Fifield, David}, month = may, year = {2023}, }
@article{stopponi_evaluation_2023, title = {Evaluation of {Distributional} {Semantic} {Models} of {Ancient} {Greek}: {Ancient} {Language} {Processing}}, shorttitle = {Evaluation of {Distributional} {Semantic} {Models} of {Ancient} {Greek}}, url = {https://pure.rug.nl/ws/portalfiles/portal/777728242/stopponi_et_al_2023_alp.pdf}, abstract = {We evaluate four count-based and predictive distributional semantic models of Ancient Greek against AGREE, a composite benchmark of human judgements, to assess their ability to retrieve semantic relatedness. On the basis of the observations deriving from the analysis of the results, we design a procedure for a largerscale intrinsic evaluation of count-based and predictive language models, including syntactic embeddings. We also propose possible ways of exploiting the different layers of the whole AGREE benchmark (including both humanand machine-generated data) and different evaluation metrics.}, journal = {Proceedings of the Ancient Language Processing Workshop}, author = {Stopponi, Silvia and Pedrazzini, Nilo and Peels-Matthey, Saskia and McGillivray, Barbara and Nissim, Malvina}, month = sep, year = {2023}, note = {Place: Varna, Bulgaria Publisher: Association for Computational Linguistics (ACL)}, keywords = {ancient greek, ancient languages, benchmark, evaluation, languge models, natural language processing, word2vec}, pages = {49--58}, }
@misc{wang_gpt-ner_2023, title = {{GPT}-{NER}: {Named} {Entity} {Recognition} via {Large} {Language} {Models}}, shorttitle = {{GPT}-{NER}}, url = {http://arxiv.org/abs/2304.10428}, doi = {10.48550/arXiv.2304.10428}, abstract = {Despite the fact that large-scale Language Models (LLM) have achieved SOTA performances on a variety of NLP tasks, its performance on NER is still significantly below supervised baselines. This is due to the gap between the two tasks the NER and LLMs: the former is a sequence labeling task in nature while the latter is a text-generation model. In this paper, we propose GPT-NER to resolve this issue. GPT-NER bridges the gap by transforming the sequence labeling task to a generation task that can be easily adapted by LLMs e.g., the task of finding location entities in the input text "Columbus is a city" is transformed to generate the text sequence "@@Columbus\#\# is a city", where special tokens @@\#\# marks the entity to extract. To efficiently address the "hallucination" issue of LLMs, where LLMs have a strong inclination to over-confidently label NULL inputs as entities, we propose a self-verification strategy by prompting LLMs to ask itself whether the extracted entities belong to a labeled entity tag. We conduct experiments on five widely adopted NER datasets, and GPT-NER achieves comparable performances to fully supervised baselines, which is the first time as far as we are concerned. More importantly, we find that GPT-NER exhibits a greater ability in the low-resource and few-shot setups, when the amount of training data is extremely scarce, GPT-NER performs significantly better than supervised models. This demonstrates the capabilities of GPT-NER in real-world NER applications where the number of labeled examples is limited.}, urldate = {2024-06-21}, publisher = {arXiv}, author = {Wang, Shuhe and Sun, Xiaofei and Li, Xiaoya and Ouyang, Rongbin and Wu, Fei and Zhang, Tianwei and Li, Jiwei and Wang, Guoyin}, month = oct, year = {2023}, note = {arXiv:2304.10428 [cs]}, keywords = {Computer Science - Computation and Language}, }
@misc{myerston_grecy_2023, title = {{greCy}: {Ancient} {Greek} {spaCy} models for {Natural} {Language} {Processing} in {Python}}, copyright = {MIT}, shorttitle = {{greCy}}, url = {https://github.com/jmyerston/greCy}, abstract = {Ancient Greek language models for spaCy}, urldate = {2024-02-27}, author = {Myerston, Jacobo and López, Jose}, month = dec, year = {2023}, note = {original-date: 2022-09-18T23:13:41Z}, }
@inproceedings{yousef_transformer-based_2023, address = {Graz}, title = {Transformer-{Based} {Named} {Entity} {Recognition} for {Ancient} {Greek}}, url = {https://www.researchgate.net/publication/372250671_Transformer-based_Named_Entity_Recognition_for_Ancient_Greek_DH2023_Graz}, doi = {10.5281/zenodo.8107629}, abstract = {This paper presents our work on training two automatic NER models for ancient Greek using transformer-based models. The models classify the entities into three categories, namely, Person, Location, and Miscellaneous and achieved promising results on test and evaluation datasets.}, language = {eng}, urldate = {2024-01-04}, booktitle = {Book of {Abstracts}}, publisher = {Zenodo}, author = {Yousef, Tariq and Palladino, Chiara and Jänicke, Stefan}, month = jun, year = {2023}, keywords = {Ancient Greek, Computer science, Cultural studies, Humanities computing, Linguistics, Long Presentation, Named Entities Recognition, Paper, Transformer models, and methods, annotation structures, natural language processing, systems}, pages = {420--422}, }
@article{sommerschield_machine_2023, title = {Machine {Learning} for {Ancient} {Languages}: {A} {Survey}}, issn = {0891-2017}, shorttitle = {Machine {Learning} for {Ancient} {Languages}}, url = {https://doi.org/10.1162/coli_a_00481}, doi = {10.1162/coli_a_00481}, abstract = {Ancient languages preserve the cultures and histories of the past. However, their study is fraught with difficulties, and experts must tackle a range of challenging text-based tasks, from deciphering lost languages to restoring damaged inscriptions, to determining the authorship of works of literature. Technological aids have long supported the study of ancient texts, but in recent years advances in artificial intelligence and machine learning have enabled analyses on a scale and in a detail that are reshaping the field of humanities, similarly to how microscopes and telescopes have contributed to the realm of science. This article aims to provide a comprehensive survey of published research using machine learning for the study of ancient texts written in any language, script, and medium, spanning over three and a half millennia of civilizations around the ancient world. To analyze the relevant literature, we introduce a taxonomy of tasks inspired by the steps involved in the study of ancient documents: digitization, restoration, attribution, linguistic analysis, textual criticism, translation, and decipherment. This work offers three major contributions: first, mapping the interdisciplinary field carved out by the synergy between the humanities and machine learning; second, highlighting how active collaboration between specialists from both fields is key to producing impactful and compelling scholarship; third, highlighting promising directions for future work in this field. Thus, this work promotes and supports the continued collaborative impetus between the humanities and machine learning.}, urldate = {2023-09-15}, journal = {Computational Linguistics}, author = {Sommerschield, Thea and Assael, Yannis and Pavlopoulos, John and Stefanak, Vanessa and Senior, Andrew and Dyer, Chris and Bodel, John and Prag, Jonathan and Androutsopoulos, Ion and Freitas, Nando de}, month = aug, year = {2023}, pages = {1--45}, }
@inproceedings{beersmans_training_2023, address = {Varna, Bulgaria}, title = {Training and {Evaluation} of {Named} {Entity} {Recognition} {Models} for {Classical} {Latin}}, url = {https://aclanthology.org/2023.alp-1.1/}, abstract = {We evaluate the performance of various models on the task of named entity recognition (NER) for classical Latin. Using an existing dataset, we train two transformer-based LatinBERT models and one shallow conditional random field (CRF) model. The performance is assessed using both standard metrics and a detailed manual error analysis, and compared to the results obtained by different already released Latin NER tools. Both analyses demonstrate that the BERT models achieve a better f1-score than the other models. Furthermore, we annotate new, unseen data for further evaluation of the models, and we discuss the impact of annotation choices on the results.}, language = {English}, booktitle = {Proceedings of the {Ancient} {Language} {Processing} {Workshop}}, publisher = {INCOMA Ltd.}, author = {Beersmans, Marijke and de Graaf, Evelien and Van de Cruys, Tim and Fantoli, Margherita}, year = {2023}, pages = {1--12}, }
@article{du_shortcut_2023, title = {Shortcut {Learning} of {Large} {Language} {Models} in {Natural} {Language} {Understanding}}, volume = {67}, issn = {0001-0782}, url = {https://dl.acm.org/doi/10.1145/3596490}, doi = {10.1145/3596490}, abstract = {Shortcuts often hinder the robustness of large language models.}, number = {1}, urldate = {2024-05-01}, journal = {Communications of the ACM}, author = {Du, Mengnan and He, Fengxiang and Zou, Na and Tao, Dacheng and Hu, Xia}, year = {2023}, pages = {110--120}, }
@inproceedings{yousef_classical_2023, title = {Classical {Philology} in the {Time} of {AI}: {Exploring} the {Potential} of {Parallel} {Corpora} in {Ancient} {Languages}}, shorttitle = {Classical {Philology} in the {Time} of {AI}}, url = {https://www.researchgate.net/profile/Chiara-Palladino/publication/373638720_Classical_Philology_in_the_Time_of_AI_Exploring_the_Potential_of_Parallel_Corpora_in_Ancient_Languages/links/64f49e0bfa851147de0fa850/Classical-Philology-in-the-Time-of-AI-Exploring-the-Potential-of-Parallel-Corpora-in-Ancient-Languages.pdf}, abstract = {This contribution presents an overview of Parallel Text Processing, particularly Translation Alignment, and illustrates the current status of this task in ancient languages. In the first part, we provide the fundamental principles of Parallel Texts and give an overview of their applications for the study of ancient texts. In the second part, we indicate how Parallel Texts can be leveraged to perform other NLP tasks, including automatic alignment, dynamic lexica induction, and Named Entity Recognition. In the conclusion, we emphasize current limitations and future work.}, author = {Yousef, Tariq and Palladino, Chiara and Shamsian, Farnoosh}, month = sep, year = {2023}, }
@misc{riemenschneider_exploring_2023, title = {Exploring {Large} {Language} {Models} for {Classical} {Philology}}, url = {http://arxiv.org/abs/2305.13698}, doi = {10.48550/arXiv.2305.13698}, abstract = {Recent advances in NLP have led to the creation of powerful language models for many languages including Ancient Greek and Latin. While prior work on Classical languages unanimously uses BERT, in this work we create four language models for Ancient Greek that vary along two dimensions to study their versatility for tasks of interest for Classical languages: we explore (i) encoder-only and encoder-decoder architectures using RoBERTa and T5 as strong model types, and create for each of them (ii) a monolingual Ancient Greek and a multilingual instance that includes Latin and English. We evaluate all models on morphological and syntactic tasks, including lemmatization, which demonstrates the added value of T5's decoding abilities. We further define two probing tasks to investigate the knowledge acquired by models pre-trained on Classical texts. Our experiments provide the first benchmarking analysis of existing models of Ancient Greek. Results show that our models provide significant improvements over the SoTA. The systematic analysis of model types can inform future research in designing language models for Classical languages, including the development of novel generative tasks. We make all our models available as community resources, along with a large curated pre-training corpus for Ancient Greek, to support the creation of a larger, comparable model zoo for Classical Philology. Our models and resources are available at https://github.com/Heidelberg-NLP/ancient-language-models.}, urldate = {2023-09-29}, publisher = {arXiv}, author = {Riemenschneider, Frederick and Frank, Anette}, month = may, year = {2023}, note = {arXiv:2305.13698 [cs]}, keywords = {Computer Science - Computation and Language, I.2.7}, }
@article{burns_latincy_2023, title = {{LatinCy}: {Synthetic} {Trained} {Pipelines} for {Latin} {NLP}}, url = {https://arxiv.org/pdf/2305.04365.pdf}, journal = {arXiv preprint arXiv:2305.04365}, author = {Burns, Patrick J}, year = {2023}, }
@article{salden_didaktische_2023, title = {Didaktische und rechtliche {Perspektiven} auf {KI}-gestütztes {Schreiben} in der {Hochschulbildung}}, journal = {Zentrum für Wissenschaftsdidaktik der Ruhr-Universität Bochum}, author = {Salden, Peter and Leschke, Jonas}, year = {2023}, }
@article{hartman_quantitative_2023, title = {{QUANTITATIVE} {APPROACHES} {TO} {LATE} {ANTIQUE} {POETICS}: {ENUMERATION} {AND} {CONGERIES}}, issn = {1350346411}, journal = {A Late Antique Poetics?: The Jeweled Style Revisited}, author = {Hartman, Joshua and Levernier, Jacob}, year = {2023}, note = {Publisher: Bloomsbury Publishing}, pages = {75}, }
@inproceedings{warstadt_findings_2023, address = {Singapore}, title = {Findings of the {BabyLM} {Challenge}: {Sample}-{Efficient} {Pretraining} on {Developmentally} {Plausible} {Corpora}}, shorttitle = {Findings of the {BabyLM} {Challenge}}, url = {https://aclanthology.org/2023.conll-babylm.1}, doi = {10.18653/v1/2023.conll-babylm.1}, abstract = {Children can acquire language from less than 100 million words of input. Large language models are far less data-efficient: they typically require 3 or 4 orders of magnitude more data and still do not perform as well as humans on many evaluations. These intensive resource demands limit the ability of researchers to train new models and use existing models as developmentally plausible cognitive models. The BabyLM Challenge is a communal effort in which participants compete to optimize language model training on a fixed data budget. Submissions are compared on various evaluation tasks targeting grammatical ability, downstream task performance, and generalization. Participants can submit to up to three tracks with progressively looser data restrictions. From over 30 submissions, we extract concrete recommendations on how best to train data-efficient language models, and on where future efforts should (and perhaps should not) focus. The winning submissions using the LTG-BERT architecture (Samuel et al., 2023) outperformed models trained on trillions of words. Other submissions achieved strong results through training on shorter input sequences or training a student model on a pretrained teacher. Curriculum learning attempts, which accounted for a large number of submissions, were largely unsuccessful, though some showed modest improvements.}, language = {en}, urldate = {2024-01-16}, booktitle = {Proceedings of the {BabyLM} {Challenge} at the 27th {Conference} on {Computational} {Natural} {Language} {Learning}}, publisher = {Association for Computational Linguistics}, author = {Warstadt, Alex and Mueller, Aaron and Choshen, Leshem and Wilcox, Ethan and Zhuang, Chengxu and Ciro, Juan and Mosquera, Rafael and Paranjabe, Bhargavi and Williams, Adina and Linzen, Tal and Cotterell, Ryan}, year = {2023}, pages = {1--6}, }
@inproceedings{kostkan_odycy_2023, address = {Dubrovnik, Croatia}, title = {{OdyCy} – {A} general-purpose {NLP} pipeline for {Ancient} {Greek}}, booktitle = {Proceedings of the 7th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature}}, publisher = {Association for Computational Linguistics}, author = {Kostkan, Jan and Kardos, Márton and Palle Bliddal Mortensen, Jacob and Laigaard Nielbo, Kristofer}, year = {2023}, pages = {128--134}, }
@incollection{gamba_latin_2023, address = {Varna, Bulgaria}, title = {Latin {Morphology} through the {Centuries}: {Ensuring} {Consistency} for {Better} {Language} {Processing}.}, url = {https://ufal.mff.cuni.cz/biblio/attachments/2023-gamba-p3787387064232511302.pdf}, booktitle = {Proceedings of the {Ancient} {Language} {Processing} {Workshop} associated with the 14th {International} {Conference} on {Recent} {Advances} in {Natural} {Language} {Processing} {RANLP} 2023}, author = {Gamba, F. and Zeman, D.}, year = {2023}, }
@incollection{gamba_universalising_2023, address = {Washington, DC, USA}, series = {Proceedings of the {Sixth} {Workshop} on {Universal} {Dependencies} ({UDW}, {GURT}/{SyntaxFest} 2023)}, title = {Universalising {Latin} {Universal} {Dependencies}: a harmonisation of {Latin} treebanks in {UD}.}, url = {https://aclanthology.org/2023.udw-1.2/}, publisher = {March. Association for Computational Linguistics (ACL)}, author = {Gamba, F. and Zeman, D.}, year = {2023}, }
@article{grillo_meta-literature_2023, title = {Meta-{Literature} and {Mimesis} in the {Rhetorica} ad {Herennium}}, volume = {144}, number = {1}, journal = {American Journal of Philology}, author = {Grillo, Luca}, year = {2023}, pages = {41--72}, }
@misc{riemenschneider_graecia_2023, title = {Graecia capta ferum victorem cepit. {Detecting} {Latin} {Allusions} to {Ancient} {Greek} {Literature}}, url = {http://arxiv.org/abs/2308.12008}, doi = {10.48550/arXiv.2308.12008}, abstract = {Intertextual allusions hold a pivotal role in Classical Philology, with Latin authors frequently referencing Ancient Greek texts. Until now, the automatic identification of these intertextual references has been constrained to monolingual approaches, seeking parallels solely within Latin or Greek texts. In this study, we introduce SPhilBERTa, a trilingual Sentence-RoBERTa model tailored for Classical Philology, which excels at cross-lingual semantic comprehension and identification of identical sentences across Ancient Greek, Latin, and English. We generate new training data by automatically translating English texts into Ancient Greek. Further, we present a case study, demonstrating SPhilBERTa's capability to facilitate automated detection of intertextual parallels. Our models and resources are available at https://github.com/Heidelberg-NLP/ancient-language-models.}, urldate = {2023-09-29}, publisher = {arXiv}, author = {Riemenschneider, Frederick and Frank, Anette}, month = aug, year = {2023}, note = {arXiv:2308.12008 [cs]}, keywords = {Computer Science - Computation and Language, I.2.7}, }
@article{krahn_sentence_2023, title = {Sentence {Embedding} {Models} for {Ancient} {Greek} {Using} {Multilingual} {Knowledge} {Distillation}}, url = {https://arxiv.org/pdf/2308.13116.pdf}, journal = {arXiv preprint arXiv:2308.13116}, author = {Krahn, Kevin and Tate, Derrick and Lamicela, Andrew C}, year = {2023}, }
@article{van_der_lek_integrating_2023, title = {Integrating research infrastructures into teaching: {Recommendations} and best practices}, shorttitle = {Integrating research infrastructures into teaching}, url = {https://zenodo.org/record/8114407}, abstract = {The UPSKILLS needs revealed that linguistics and language-related degree programmes seldom include language data standards and research data repositories in their learning outcomes. A survey of lecturers from linguistics and language-related disciplines also exposed a number of challenges in using repositories for language data discovery, reuse and archiving. Against this backdrop, the present guide shows how teachers and trainers can leverage the CLARIN research infrastructure to help students enhance their data collection, processing and analysis, and archiving skills. By integrating research infrastructures into teaching, educators can bridge the gap between theoretical knowledge and practical aspects of linguistic research data management, equipping students with the necessary skills and competences to thrive in the evolving landscape of open science and data-driven research.}, urldate = {2023-10-02}, author = {van der Lek, Iulianna and Fišer, Darja and Samardzic, Tanja and Simonovic, Marko and Assimakopoulos, Stavros and Bernardini, Silvia and Milicevic Petrovic, Maja and Puskas, Genoveva}, month = aug, year = {2023}, note = {Publisher: Zenodo}, }
@misc{schork_kunstliche_2023, title = {Künstliche {Intelligenz} in der {Bildung}: {Drei} {Zukunftsszenarien} und fünf {Handlungsfelder}}, url = {https://ki-campus.org/sites/default/files/2023-04/2023-03_Diskussionspapier_KI_Bildung_Zukunftsszenarien_Handlungsfelder_KI-Campus.pdf}, abstract = {Die voranschreitende Digitalisierung und insbesondere der Einsatz von Künstlicher Intelligenz (KI) in der Bildung eröffnen neue Möglichkeiten des Lernens. Bildung kann stärker individuell sowie zeitlich und räumlich entgrenzt stattfinden. ChatGPT verdeutlicht, wie dynamisch die Entwicklungen im Bereich KI sind. Das in Zunahme begriffene Interesse am Thema KI allgemein sowie KI in der Bildung kommt dem erforderlichen Austausch zwischen zentralen Akteur:innen zugute. Dieses Diskussionspapier präsentiert drei Zukunftsszenarien für den Einsatz von KI in der institutionellen Bildung: Hochschule, Weiterbildung und Schule. Im Fokus stehen die übergeordneten Fragestellungen nach Voraussetzungen und Gelingensbedingungen einer erfolgreichen Anwendung sowie nach gemeinsamen Handlungsfeldern für zentrale Stakeholder. Fünf Handlungsfelder für den Einsatz von KI in institutioneller Bildung sind auf Grundlage der skizzierten Zukunftsszenarien hervorzuheben: (1) Interdisziplinäre Zusammenarbeit, (2) Qualifizierungsangebote und Kompetenzentwicklung, (3) Digitale Infrastruktur und Personal, (4) Ethik und Datensouveränität sowie (5) Interoperabilität von Daten in Bildungskontexten. Ziel des vorliegenden Diskussionspapiers ist es, den kooperativen Austausch zwischen relevanten Stakeholdern anzuregen.}, author = {Schork, Sabrina and Schleiss, Johannes and Mah, Dana-Kristin and Böhme, Katrin and Fischer, David and Mesenhöller, Janne and Paaßen, Benjamin and Schrumpf, Johannes}, year = {2023}, }
@article{motoki_more_2023, title = {More human than human: measuring {ChatGPT} political bias}, issn = {1573-7101}, shorttitle = {More human than human}, url = {https://doi.org/10.1007/s11127-023-01097-2}, doi = {10.1007/s11127-023-01097-2}, abstract = {We investigate the political bias of a large language model (LLM), ChatGPT, which has become popular for retrieving factual information and generating content. Although ChatGPT assures that it is impartial, the literature suggests that LLMs exhibit bias involving race, gender, religion, and political orientation. Political bias in LLMs can have adverse political and electoral consequences similar to bias from traditional and social media. Moreover, political bias can be harder to detect and eradicate than gender or racial bias. We propose a novel empirical design to infer whether ChatGPT has political biases by requesting it to impersonate someone from a given side of the political spectrum and comparing these answers with its default. We also propose dose-response, placebo, and profession-politics alignment robustness tests. To reduce concerns about the randomness of the generated text, we collect answers to the same questions 100 times, with question order randomized on each round. We find robust evidence that ChatGPT presents a significant and systematic political bias toward the Democrats in the US, Lula in Brazil, and the Labour Party in the UK. These results translate into real concerns that ChatGPT, and LLMs in general, can extend or even amplify the existing challenges involving political processes posed by the Internet and social media. Our findings have important implications for policymakers, media, politics, and academia stakeholders.}, language = {en}, urldate = {2023-09-15}, journal = {Public Choice}, author = {Motoki, Fabio and Neto, Valdemar Pinho and Rodrigues, Victor}, month = aug, year = {2023}, keywords = {Bias, C10, C89, ChatGPT, D83, L86, Large language models, Political bias, Z00}, }
@misc{gallegos_bias_2023, title = {Bias and {Fairness} in {Large} {Language} {Models}: {A} {Survey}}, shorttitle = {Bias and {Fairness} in {Large} {Language} {Models}}, url = {http://arxiv.org/abs/2309.00770}, doi = {10.48550/arXiv.2309.00770}, abstract = {Rapid advancements of large language models (LLMs) have enabled the processing, understanding, and generation of human-like text, with increasing integration into systems that touch our social sphere. Despite this success, these models can learn, perpetuate, and amplify harmful social biases. In this paper, we present a comprehensive survey of bias evaluation and mitigation techniques for LLMs. We first consolidate, formalize, and expand notions of social bias and fairness in natural language processing, defining distinct facets of harm and introducing several desiderata to operationalize fairness for LLMs. We then unify the literature by proposing three intuitive taxonomies, two for bias evaluation, namely metrics and datasets, and one for mitigation. Our first taxonomy of metrics for bias evaluation disambiguates the relationship between metrics and evaluation datasets, and organizes metrics by the different levels at which they operate in a model: embeddings, probabilities, and generated text. Our second taxonomy of datasets for bias evaluation categorizes datasets by their structure as counterfactual inputs or prompts, and identifies the targeted harms and social groups; we also release a consolidation of publicly-available datasets for improved access. Our third taxonomy of techniques for bias mitigation classifies methods by their intervention during pre-processing, in-training, intra-processing, and post-processing, with granular subcategories that elucidate research trends. Finally, we identify open problems and challenges for future work. Synthesizing a wide range of recent research, we aim to provide a clear guide of the existing literature that empowers researchers and practitioners to better understand and prevent the propagation of bias in LLMs.}, urldate = {2023-09-15}, publisher = {arXiv}, author = {Gallegos, Isabel O. and Rossi, Ryan A. and Barrow, Joe and Tanjim, Md Mehrab and Kim, Sungchul and Dernoncourt, Franck and Yu, Tong and Zhang, Ruiyi and Ahmed, Nesreen K.}, month = sep, year = {2023}, note = {arXiv:2309.00770 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Computers and Society, Computer Science - Machine Learning}, }
@incollection{bewersdorff_tum-digillab_2023, address = {Wiesbaden}, series = {Edition {Fachdidaktiken}}, title = {Das {TUM}-{DigiLLab}: {Lehr}-{Lernraum} sowie {Forschungs}- und {Entwicklungsumgebung} zur {Förderung} digitaler {Kompetenzen}}, isbn = {978-3-658-40109-2}, shorttitle = {Das {TUM}-{DigiLLab}}, url = {https://doi.org/10.1007/978-3-658-40109-2_10}, abstract = {Das Digitale Lehr-Lern-Labor der Technischen Universität München (TUM-DigiLLab) soll als Ort der Entwicklung, Durchführung und Beforschung von Lehr-Lernkonzepten in authentischen Anwendungskontexten helfen die Lücke zwischen theoretischer Wissensvermittlung an der Universität und der Wissensanwendung in lebensweltlichen oder professionellen Zusammenhängen der Praxis zu schließen. Einen didaktischen Schwerpunkt bildet die Entwicklung innovativer Lehr-Lernkonzepte im Themenfeld der Künstlichen Intelligenz.}, language = {de}, urldate = {2023-09-15}, booktitle = {Lehr-{Lern}-{Labore} und {Digitalisierung}}, publisher = {Springer Fachmedien}, author = {Bewersdorff, Arne and Nerdel, Claudia}, editor = {Meier, Monique and Greefrath, Gilbert and Hammann, Marcus and Wodzinski, Rita and Ziepprecht, Kathrin}, year = {2023}, doi = {10.1007/978-3-658-40109-2_10}, keywords = {Augmented Reality, Digitalisierung, Künstliche Intelligenz, Lehr-Lern-Labore, Technologiegestützte Kooperation}, pages = {137--141}, }
@article{bewersdorff_myths_2023, title = {Myths, mis- and preconceptions of artificial intelligence: {A} review of the literature}, volume = {4}, issn = {2666-920X}, shorttitle = {Myths, mis- and preconceptions of artificial intelligence}, url = {https://www.sciencedirect.com/science/article/pii/S2666920X2300022X}, doi = {10.1016/j.caeai.2023.100143}, abstract = {Artificial Intelligence (AI) is prevalent in nearly every aspect of our lives. However, recent studies have found a significant amount of confusion and misunderstanding surrounding AI. To develop effective educational programs in the field of AI, it is vital to examine and understand learners' pre- and misconceptions as well as myths about AI. This study examined a corpus of 591 studies. 25 relevant studies were identified by applying the following eligibility criteria: English-written original empirical research on education and AI and reporting AI conceptions in a formal learning context. The review found studies from six continents, with the majority conducted in Europe and North America. The studies predominantly focus on the school and university levels. Findings reveal a range of preconceptions, misconceptions, and myths about AI, such as: Learners often have limited understanding of AI on a technical level. They tend to attribute human-like characteristics or attributes to AI systems and may have narrow views of AI's scope, capabilities, and limitations. The review also shows that learners often have binary and unspecific views about the threats, dangers, and benefits of AI. Effective educational programs are key to empower learners' understanding of AI, thus helping them make informed decisions about the integration of AI in our society, rather than being swayed by misinformation and unnecessary fear. This review may help inform the development of more effective teaching and outreach strategies in AI education.}, urldate = {2023-09-15}, journal = {Computers and Education: Artificial Intelligence}, author = {Bewersdorff, Arne and Zhai, Xiaoming and Roberts, Jessica and Nerdel, Claudia}, month = jan, year = {2023}, keywords = {Artificial intelligence, Misconceptions, Preconceptions, Review}, pages = {100143}, }
@book{hose_formen_2023, address = {Stuttgart}, title = {Formen und {Funktionen} griechisch-römischer {Literatur} : {Aufsätze} zur {Literaturgeschichte} und {Literaturgeschichtsschreibung}}, isbn = {978-3-515-13411-8}, shorttitle = {Formen und {Funktionen} griechisch-römischer {Literatur}}, url = {https://d-nb.info/1273972600/04}, publisher = {Franz Steiner Verlag}, author = {Hose, Martin}, editor = {Peri, Annamaria and Thum, Tobias}, year = {2023}, }
@book{baker_chatgpt_2023, title = {{ChatGPT} für {Dummies}}, isbn = {978-3-527-84473-9}, abstract = {Profitieren auch Sie von den wunderbaren Fähigkeiten von ChatGPT. Pam Baker erklärt Ihnen, wie ChatGPT funktioniert und wie Sie den Chatbot gewinnbringend einsetzen - sei es bei der Texterstellung für Werbezwecke, der Kundenbetreuung auf einer Webseite oder für die Beantwortung all jener Fragen, auf die Sie bisher keine Antwort gefunden haben. Sie lernen die Stärken und Schwächen des Tools kennen. So können Sie besser einschätzen, wo es Ihnen nutzt und wo Sie besser weiter arbeiten wie bisher. Erschließen Sie das Potenzial von ChatGPT!}, language = {de}, publisher = {John Wiley \& Sons}, author = {Baker, Pam}, month = aug, year = {2023}, note = {Google-Books-ID: dEbREAAAQBAJ}, keywords = {Computers / Artificial Intelligence / General, Computers / Computer Science, Computers / Information Technology}, }
@misc{zhao_survey_2023, title = {A {Survey} of {Large} {Language} {Models}}, url = {http://arxiv.org/abs/2303.18223}, abstract = {Ever since the Turing Test was proposed in the 1950s, humans have explored the mastering of language intelligence by machine. Language is essentially a complex, intricate system of human expressions governed by grammatical rules. It poses a significant challenge to develop capable artificial intelligence (AI) algorithms for comprehending and grasping a language. As a major approach, language modeling has been widely studied for language understanding and generation in the past two decades, evolving from statistical language models to neural language models. Recently, pre-trained language models (PLMs) have been proposed by pretraining Transformer models over large-scale corpora, showing strong capabilities in solving various natural language processing (NLP) tasks. Since the researchers have found that model scaling can lead to an improved model capacity, they further investigate the scaling effect by increasing the parameter scale to an even larger size. Interestingly, when the parameter scale exceeds a certain level, these enlarged language models not only achieve a significant performance improvement, but also exhibit some special abilities (e.g., incontext learning) that are not present in small-scale language models (e.g., BERT). To discriminate the language models in different parameter scales, the research community has coined the term large language models (LLM) for the PLMs of significant size (e.g., containing tens or hundreds of billions of parameters). Recently, the research on LLMs has been largely advanced by both academia and industry, and a remarkable progress is the launch of ChatGPT (a powerful AI chatbot developed based on LLMs), which has attracted widespread attention from society. The technical evolution of LLMs has been making an important impact on the entire AI community, which would revolutionize the way how we develop and use AI algorithms. Considering this rapid technical progress, in this survey, we review the recent advances of LLMs by introducing the background, key findings, and mainstream techniques. In particular, we focus on four major aspects of LLMs, namely pre-training, adaptation tuning, utilization, and capacity evaluation. Furthermore, we also summarize the available resources for developing LLMs and discuss the remaining issues for future directions. This survey provides an up-to-date review of the literature on LLMs, which can be a useful resource for both researchers and engineers.}, language = {en}, urldate = {2023-09-14}, publisher = {arXiv}, author = {Zhao, Wayne Xin and Zhou, Kun and Li, Junyi and Tang, Tianyi and Wang, Xiaolei and Hou, Yupeng and Min, Yingqian and Zhang, Beichen and Zhang, Junjie and Dong, Zican and Du, Yifan and Yang, Chen and Chen, Yushuo and Chen, Zhipeng and Jiang, Jinhao and Ren, Ruiyang and Li, Yifan and Tang, Xinyu and Liu, Zikang and Liu, Peiyu and Nie, Jian-Yun and Wen, Ji-Rong}, month = sep, year = {2023}, note = {arXiv:2303.18223 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@article{linka_pain_2023, title = {Pain in {Classical} {Greek} {Texts}}, copyright = {Copyright (c) 2023}, issn = {2364-7957}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/93792}, doi = {10.11588/dco.2023.9.93792}, abstract = {Texte aus der klassischen griechischen Periode spielen eine entscheidende Rolle in der historischen Entwicklung der westlichen Wissenschaft und Philosophie. Das Konzept des Schmerzes ist vor allem für zwei Bereiche des menschlichen Wissens, nämlich Medizin und Ethik, von zentraler Bedeutung. Obwohl der Begriff des Schmerzes für beide Bereiche wichtig ist, werden sie in der Wissenschaft meist getrennt voneinander untersucht. Wir betrachten sie gemeinsam, im Kontext der gesamten erhaltenen klassischen griechischen Literatur. Dies wird durch unseren methodischen Ansatz ermöglicht, der traditionelle Interpretationsansätze mit computergestützten Textanalysemethoden kombiniert und so die Untersuchung einer großen Menge von Textdaten ermöglicht. Wenn wir den Kontext der Verwendung einzelner Wörter, die Schmerz bezeichnen, in den Texten verschiedener Gattungen oder Themen betrachten, können wir relativ stabile semantische Cluster identifizieren, auf die auf Schmerz hindeuten, wie etwa Pathologien, Emotionen oder Moral. Auf diese Weise sind wir in der Lage, die Rolle bestimmter Schmerzwörter, ihre Bedeutung und ihre wechselseitigen Beziehungen in klassischen griechischen Texten zu erfassen. Unser Ansatz ermöglicht es uns auch, die Rolle verschiedener textueller Subkorpora (philosophisch, medizinisch) für die Art und Weise zu erkennen, wie Schmerz in klassischen griechischen Texten aufgefasst wurde.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Linka, Vojtěch and Kaše, Vojtěch}, month = apr, year = {2023}, keywords = {Semantische Analyse}, pages = {1--14}, }
@misc{burns_latincy_2023, title = {{LatinCy}: {Synthetic} {Trained} {Pipelines} for {Latin} {NLP}}, shorttitle = {{LatinCy}}, url = {http://arxiv.org/abs/2305.04365}, doi = {10.48550/arXiv.2305.04365}, abstract = {This paper introduces LatinCy, a set of trained general purpose Latin-language "core" pipelines for use with the spaCy natural language processing framework. The models are trained on a large amount of available Latin data, including all five of the Latin Universal Dependency treebanks, which have been preprocessed to be compatible with each other. The result is a set of general models for Latin with good performance on a number of natural language processing tasks (e.g. the top-performing model yields POS tagging, 97.41\% accuracy; lemmatization, 94.66\% accuracy; morphological tagging 92.76\% accuracy). The paper describes the model training, including its training data and parameterization, and presents the advantages to Latin-language researchers of having a spaCy model available for NLP work.}, urldate = {2023-07-16}, publisher = {arXiv}, author = {Burns, Patrick J.}, month = may, year = {2023}, note = {arXiv:2305.04365 [cs] version: 1}, keywords = {Computer Science - Computation and Language}, }
@inproceedings{sprugnoli_sentiment_2022, title = {Sentiment {Analysis} of {Latin} {Poetry}: {First} {Experiments} on the {Odes} of {Horace}}, url = {https://books.openedition.org/aaccademia/10854}, booktitle = {Proceedings of the {Eighth} {Italian} {Conference} on {Computational} {Linguistics} {CliC}-{It} 2021}, author = {Sprugnoli, Rachele and Mambrini, Francesco and Passarotti, Marco and Moretti, Giovanni}, year = {2022}, }
@inproceedings{yamshchikov_bert_2022, title = {{BERT} in {Plutarch}'s {Shadows}}, url = {https://aclanthology.org/2022.emnlp-main.407.pdf}, doi = {10.18653/v1/2022.emnlp-main.407}, booktitle = {Proceedings of the 2022 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing}}, publisher = {Association for Computational Linguistics}, author = {Yamshchikov, Ivan P and Tikhonov, Alexey and Pantis, Yorgos and Schubert, Charlotte and Jost, Jürgen}, year = {2022}, pages = {6071--6080}, }
@article{assael_restoring_2022, title = {Restoring and attributing ancient texts using deep neural networks}, volume = {603}, copyright = {2022 The Author(s)}, issn = {1476-4687}, url = {https://www.nature.com/articles/s41586-022-04448-z}, doi = {10.1038/s41586-022-04448-z}, abstract = {Ancient history relies on disciplines such as epigraphy—the study of inscribed texts known as inscriptions—for evidence of the thought, language, society and history of past civilizations1. However, over the centuries, many inscriptions have been damaged to the point of illegibility, transported far from their original location and their date of writing is steeped in uncertainty. Here we present Ithaca, a deep neural network for the textual restoration, geographical attribution and chronological attribution of ancient Greek inscriptions. Ithaca is designed to assist and expand the historian’s workflow. The architecture of Ithaca focuses on collaboration, decision support and interpretability. While Ithaca alone achieves 62\% accuracy when restoring damaged texts, the use of Ithaca by historians improved their accuracy from 25\% to 72\%, confirming the synergistic effect of this research tool. Ithaca can attribute inscriptions to their original location with an accuracy of 71\% and can date them to less than 30 years of their ground-truth ranges, redating key texts of Classical Athens and contributing to topical debates in ancient history. This research shows how models such as Ithaca can unlock the cooperative potential between artificial intelligence and historians, transformationally impacting the way that we study and write about one of the most important periods in human history.}, language = {en}, number = {7900}, urldate = {2023-04-26}, journal = {Nature}, author = {Assael, Yannis and Sommerschield, Thea and Shillingford, Brendan and Bordbar, Mahyar and Pavlopoulos, John and Chatzipanagiotou, Marita and Androutsopoulos, Ion and Prag, Jonathan and de Freitas, Nando}, month = mar, year = {2022}, note = {Number: 7900 Publisher: Nature Publishing Group}, keywords = {Archaeology, Computer science, History}, pages = {280--283}, }
@article{mcgillivray_new_2022, title = {A {New} {Corpus} {Annotation} {Framework} for {Latin} {Diachronic} {Lexical} {Semantics}}, volume = {21}, url = {doi:10.1515/joll-2022-2007.}, number = {1}, journal = {Journal of Latin linguistics}, author = {McGillivray, Barbara and Kondakova, Daria and Burman, Annie and Dell’Oro, Francesca and Bermúdez Sabel, Helena and Marongiu, Paola and Cruz, Manuel Márquez}, year = {2022}, keywords = {Latin lexical semantics, LatinISE corpus, annotation, semantic change}, pages = {47--105}, }
@inproceedings{pavlopoulos_sentiment_2022, title = {Sentiment {Analysis} of {Homeric} {Text}: {The} 1st {Book} of {Iliad}}, url = {https://aclanthology.org/2022.lrec-1.765.pdf}, author = {Pavlopoulos, John and Xenos, Alexandros and Picca, Davide}, year = {2022}, pages = {7071--7077}, }
@incollection{de_graaf_agile_2022, address = {Marseille, France}, title = {{AGILe}: {The} {First} {Lemmatizer} for {Ancient} {Greek} {Inscriptions}}, booktitle = {Proceedings of the 13th {Conference} on {Language} {Resources} and {Evaluation} ({LREC} 2022)}, author = {de Graaf, E. and Stopponi, S. and Bos, J. and Peels-Matthey, S. and Nissm, M.}, year = {2022}, pages = {5334--5344}, }
@article{prieto_espinosa_corpus_2022, title = {El {Corpus} {Documentale} {Latinum} {Hispaniarum} ({CODOLHisp}), una plataforma digital d’accés conjunt per a l’estudi del llatí medieval hispànic}, url = {https://raco.cat/index.php/LlenguaLiteratura/article/view/399457}, journal = {Llengua i literatura : revista anual de la Societat Catalana de Llengua i Literatura}, author = {Prieto Espinosa, Carlos}, year = {2022}, pages = {204--207}, }
@article{forstall_towards_2022, title = {Towards a {Linked} {Open} {Data} {Resource} for {Direct} {Speech} {Acts} in {Greek} and {Latin} {Epic}}, volume = {37}, doi = {doi:10.1093/llc/fqac006.}, number = {4}, journal = {Digital scholarship in the humanities}, author = {Forstall, Christopher W. and Finkmann, Simone and Verhelst, Berenice}, year = {2022}, pages = {972--981}, }
@book{regnault_annotation_2022, address = {Paris, Francehttps://hal-lirmm.ccsd.cnrs.fr/AO-LINGUISTIQUE/tel-04069848v1.}, title = {Annotation et analyse syntaxique de corpus hétérogènes : le cas du français médiéval}, url = {https://hal-lirmm.ccsd.cnrs.fr/AO-LINGUISTIQUE/tel-04069848v1}, publisher = {Univ. de la Sorbonne Nouvelle (Paris III)}, author = {Regnault, Mathilde}, year = {2022}, }
@article{kenty_irony_2022, title = {Irony and {Figured} {Language} in {Cicero}’s {Letter} to {Lucceius}}, volume = {118}, language = {English}, number = {1}, journal = {Classical Journal}, author = {Kenty, Joanna}, year = {2022}, pages = {50--89}, }
@inproceedings{sprugnoli_overview_2022, address = {Marseille, France}, title = {Overview of the {EvaLatin} 2022 {Evaluation} {Campaign}}, url = {https://aclanthology.org/2022.lt4hala-1.29}, abstract = {This paper describes the organization and the results of the second edition of EvaLatin, the campaign for the evaluation of Natural Language Processing tools for Latin. The three shared tasks proposed in EvaLatin 2022, i.,e.,Lemmatization, Part-of-Speech Tagging and Features Identification, are aimed to foster research in the field of language technologies for Classical languages. The shared dataset consists of texts mainly taken from the LASLA corpus. More specifically, the training set includes only prose texts of the Classical period, whereas the test set is organized in three sub-tasks: a Classical sub-task on a prose text of an author not included in the training data, a Cross-genre sub-task on poetic and scientific texts, and a Cross-time sub-task on a text of the 15th century. The results obtained by the participants for each task and sub-task are presented and discussed.}, urldate = {2023-10-06}, booktitle = {Proceedings of the {Second} {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, publisher = {European Language Resources Association}, author = {Sprugnoli, Rachele and Passarotti, Marco and Cecchini, Flavio Massimiliano and Fantoli, Margherita and Moretti, Giovanni}, month = jun, year = {2022}, pages = {183--188}, }
@inproceedings{passarotti_issues_2022, title = {Issues in {Building} the {LiLa} {Knowledge} {Base} of {Interoperable} {Linguistic} {Resources} for {Latin}}, url = {https://zenodo.org/doi/10.5281/zenodo.7263412}, author = {Passarotti, Marco and Mambrini, Francesco}, year = {2022}, }
@article{engelhardt_how_2022, title = {How to be {FAIR} with your data}, copyright = {https://creativecommons.org/licenses/by/4.0/}, url = {https://www.univerlag.uni-goettingen.de/handle/3/isbn-978-3-86395-539-7}, doi = {10.17875/gup2022-1915}, abstract = {Softcover, 17x24}, language = {eng}, urldate = {2023-10-02}, author = {Engelhardt, Claudia and Barthauer, Raisa and Biernacka, Katarzyna and Coffey, Aoife and Cornet, Ronald and Danciu, Alina and Demchenko, Yuri and Downes, Stephen and Erdmann, Christopher and Garbuglia, Federica and Germer, Kerstin and Helbig, Kerstin and Hellström, Margareta and Hettne, Kristina and Hibbert, Dawn and Jetten, Mijke and Karimova, Yulia and Hansen, Karsten Kryger and Kuusniemi, Mari Elisa and Letizia, Viviana and McCutcheon, Valerie and McGillivray, Barbara and Ostrop, Jenny and Petersen, Britta and Petrus, Ana and Reichmann, Stefan and Rettberg, Najla and Reverté, Carmen and Rochlin, Nick and Saenen, Bregt and Schmidt, Birgit and Scholten, Jolien and Shanahan, Hugh and Straube, Armin and Eynden, Veerle Van den and Vandendorpe, Justine and Venkataram, Shanmugasundaram and Vieira, André and Wiljes, Cord and Wuttke, Ulrike and Yeomans, Joanne and Zhou, Biru}, year = {2022}, note = {Accepted: 2022-05-13T08:33:14Z Artwork Medium: Print Interview Medium: Print}, }
@misc{wienrich_ai_2022, title = {{AI} {Literacy}: {Kompetenzdimensionen} und {Einflussfaktoren} im {Kontext} von {Arbeit}}, url = {https://www.denkfabrik-bmas.de/fileadmin/Downloads/Publikationen/AI_Literacy_Kompetenzdimensionen_und_Einflussfaktoren_im_Kontext_von_Arbeit.pdf}, author = {Wienrich, Carolin and Carolus, Astrid and Augustin, Yannik and Markus, André}, year = {2022}, }
@incollection{egger_natural_2022, address = {Cham}, series = {Tourism on the {Verge}}, title = {Natural {Language} {Processing} ({NLP}): {An} {Introduction}}, isbn = {978-3-030-88389-8}, shorttitle = {Natural {Language} {Processing} ({NLP})}, url = {https://doi.org/10.1007/978-3-030-88389-8_15}, abstract = {With the increase in internet usage, the amount of available textual data has also continued to increase rapidly. In addition, the development of stronger computers has enabled the processing of data to become much easier. The tourism field has a strong potential to utilize such data available on the internet; yet, on the other hand, a high proportion of available data is unlabelled and unprocessed. In order to use them effectively, new methods and new approaches are needed. In this regard, the area of Natural Language Processing (NLP) helps researchers to utilize textual data and develop an understanding of text analysis. By using machine learning approaches, text mining potential can expand enormously, leading to deeper insights, a better understanding of social phenomena, and, thus, also a better basis for decision-making. As such, this chapter will provide the reader with the basics of NLP as well as present the text pre-processing procedure in detail.}, language = {en}, urldate = {2023-09-14}, booktitle = {Applied {Data} {Science} in {Tourism}: {Interdisciplinary} {Approaches}, {Methodologies}, and {Applications}}, publisher = {Springer International Publishing}, author = {Egger, Roman and Gokce, Enes}, editor = {Egger, Roman}, year = {2022}, doi = {10.1007/978-3-030-88389-8_15}, keywords = {Feature extraction, NER, POS, Pre-processing, Text cleaning}, pages = {307--334}, }
@inproceedings{peverelli_process_2022, address = {Antwerp}, title = {The {Process} of {Imitatio} {Through} {Stylometric} {Analysis}: the {Case} of {Terence}’s {Eunuchus}}, abstract = {The Early Modern Era is at the forefront of a widespread enthusiasm for Latin works: texts from classical antiquity are given new life, widely re-printed, studied and even repeatedly staged, in the case of dramas, throughout Europe. Also, new Latin comedies are again written in quantities never seen before (at least 10,000 works published 1500 to 1800 are known). The authors themselves, within the game of literary imitation (the process of imitatio), start to mimic the style of ancient authors, and Terence’s dramas in particular were considered the prime sources of reuse for many decades. Via a case study ”the reception of Terence’s Eunuchus in Early Modern literature”, we take a deep dive into the mechanisms of literary imitation. Our analysis is based on four comedy corpora in Latin, Italian, French and English, spanning roughly 3 centuries (1400-1700). To assess the problem of language shi昀琀 and multi-language intercorpora analysis, we base our experiments on translations of the Eunuchus, one for each sub-corpus. Through the use of tools drawn from the 昀椀eld of Stylometry, we address the topic of text reuse and textual similarities between Terence’s text and Early-Modern corpora to get a better grasp on the internal 昀氀uctuations of the imitation game between Early Modern and Classical authors.}, language = {en}, author = {Peverelli, Andrea and van Erp, Marieke and Bloemendal, Jan}, year = {2022}, pages = {337--354}, }
@misc{nagy_stylometric_2022, title = {Some {Stylometric} {Remarks} on {Ovid}'s {Heroides} and the {Epistula} {Sapphus}}, url = {http://arxiv.org/abs/2202.11864}, doi = {10.48550/arXiv.2202.11864}, abstract = {This article aims to contribute to two well-worn areas of debate in classical Latin philology, relating to Ovid's Heroides. The first is the question of the authenticity (and, to a lesser extent the correct position) of the letter placed fifteenth by almost every editor -- the so-called Epistula Sapphus (henceforth ES). The secondary question, although perhaps now less fervently debated, is the authenticity of the 'Double Heroides', placed by those who accept them as letters 16-21. I employ a variety of methods drawn from the domain of computational stylometry to consider the poetics and the lexico-grammatical features of these elegiac poems in the broader context of a corpus of 'shorter' (from 20 to 546 lines) elegiac works from five authors (266 poems in all) comprising more or less all of the non-fragmentary classical corpus. Based on a variety of techniques, every measure gives clear indication that the poetic style of the Heroides is Ovidian, but distinctive; they can be accurately isolated from Ovid more broadly. The Single and Double Heroides split into two clear groups, with the ES grouped consistently with the single letters. Furthermore, by comparing the style of the letters with the 'early' (although there are complications in this label) works of the Amores and the late works of the Ex Ponto, the evidence supports sequential composition -- meaning that the ES is correctly placed -- and, further, supports the growing consensus that the double letters were composed significantly later, in exile.}, urldate = {2023-08-26}, publisher = {arXiv}, author = {Nagy, Ben}, month = feb, year = {2022}, note = {arXiv:2202.11864 [cs]}, keywords = {Computer Science - Computation and Language}, }
@article{nagy_rhyme_2022, title = {Rhyme in classical {Latin} poetry: {Stylistic} or stochastic?}, volume = {37}, issn = {2055-7671}, shorttitle = {Rhyme in classical {Latin} poetry}, url = {https://doi.org/10.1093/llc/fqab105}, doi = {10.1093/llc/fqab105}, abstract = {This study offers the first broad quantitative analysis of the use of rhyme in classical Latin hexameter and elegiac verse. The data and tools developed for the analysis are released under a permissive open source license. These include software to create an accurate phonetic transcription of Latin verse from the Musisque Deoque corpus; a system for scoring rhyme via phonetic similarity; and a system for generating large amounts of metrically correct, stochastic Latin verse (useful for analysis baselines). Further to this, some initial analysis is performed: first via descriptive statistics and then with two unsupervised multivariate analyses using dimension reduction methods. The study examines nineteen works by twelve authors, comprising about 96,000 lines. First and foremost, the results suggest that rhyme was consciously used by classical authors, but to different extents and in different ways. There is a solid and detectable stylistic separation between the use of rhyme in elegy and epic, and possibly also between satire and the rest. Within genres, authors can be stylistically separated with a small set of features. On the negative side, it appears that the stylistic signal from rhyme is fairly faint, and so forensic analysis (e.g. for authorship attribution) is not presently recommended on texts that are shorter than several thousand lines.}, number = {4}, urldate = {2023-08-26}, journal = {Digital Scholarship in the Humanities}, author = {Nagy, Ben}, month = dec, year = {2022}, pages = {1097--1118}, }
@misc{gebru_datasheets_2021, title = {Datasheets for {Datasets}}, url = {http://arxiv.org/abs/1803.09010}, doi = {10.48550/arXiv.1803.09010}, abstract = {The machine learning community currently has no standardized process for documenting datasets, which can lead to severe consequences in high-stakes domains. To address this gap, we propose datasheets for datasets. In the electronics industry, every component, no matter how simple or complex, is accompanied with a datasheet that describes its operating characteristics, test results, recommended uses, and other information. By analogy, we propose that every dataset be accompanied with a datasheet that documents its motivation, composition, collection process, recommended uses, and so on. Datasheets for datasets will facilitate better communication between dataset creators and dataset consumers, and encourage the machine learning community to prioritize transparency and accountability.}, urldate = {2024-09-03}, publisher = {arXiv}, author = {Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Daumé III, Hal and Crawford, Kate}, month = dec, year = {2021}, note = {arXiv:1803.09010 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Databases, Computer Science - Machine Learning}, }
@inproceedings{singh_pilot_2021, address = {Punta Cana, Dominican Republic (online)}, title = {A {Pilot} {Study} for {BERT} {Language} {Modelling} and {Morphological} {Analysis} for {Ancient} and {Medieval} {Greek}}, url = {https://aclanthology.org/2021.latechclfl-1.15}, doi = {10.18653/v1/2021.latechclfl-1.15}, abstract = {This paper presents a pilot study to automatic linguistic preprocessing of Ancient and Byzantine Greek, and morphological analysis more specifically. To this end, a novel subword-based BERT language model was trained on the basis of a varied corpus of Modern, Ancient and Post-classical Greek texts. Consequently, the obtained BERT embeddings were incorporated to train a fine-grained Part-of-Speech tagger for Ancient and Byzantine Greek. In addition, a corpus of Greek Epigrams was manually annotated and the resulting gold standard was used to evaluate the performance of the morphological analyser on Byzantine Greek. The experimental results show very good perplexity scores (4.9) for the BERT language model and state-of-the-art performance for the fine-grained Part-of-Speech tagger for in-domain data (treebanks containing a mixture of Classical and Medieval Greek), as well as for the newly created Byzantine Greek gold standard data set. The language models and associated code are made available for use at https://github.com/pranaydeeps/Ancient-Greek-BERT}, urldate = {2023-10-05}, booktitle = {Proceedings of the 5th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature}}, publisher = {Association for Computational Linguistics}, author = {Singh, Pranaydeep and Rutten, Gorik and Lefever, Els}, month = nov, year = {2021}, pages = {128--137}, }
@misc{sprugnoli_sentiment_2021, title = {Sentiment {Analysis} for {Latin}: a {Journey} from {Seneca} to {Thomas} {Aquinas}}, shorttitle = {Sentiment {Analysis} for {Latin}}, url = {https://zenodo.org/record/4575431#.YKelWoMzbJw}, abstract = {While the main applications of resources and tools for sentiment analysis typically fall within the scope of fields like customer experience and social media monitoring, there is an increasing interest in extending their range to texts written in ancient and historical languages. Such interest mirrors the substantial growth of the area dedicated to building and using linguistic resources for these languages, which are essential for accessing and understanding the Classical tradition. In this talk, we will present the methodology we followed to create and evaluate a new set of Latin sentiment lexicons, and the process of inclusion of a prior polarity lexicon of Latin lemmas in a knowledge base of interoperable linguistic resources developed within the ERC project “LiLa: Linking Latin”. We will discuss the main challenges we face when working with ancient languages (e.g., lack of native speakers, limited amount of data, unusual textual genres for the sentiment analysis task, such as philosophical or documentary texts) and we will describe two use cases underscoring the importance of an interdisciplinary approach combining computational linguistics, semantic web and humanities practices.}, urldate = {2021-05-21}, author = {Sprugnoli, Rachele}, month = mar, year = {2021}, doi = {10.5281/zenodo.4575431}, keywords = {computational linguistics, latin language, sentiment analysis}, }
@article{buccheri_semantic_2021, title = {Semantic {Analysis} and {Frequency} {Effects} of {Conceptual} {Metaphors} of {Emotions} in {Latin} : {From} a {Corpus}-{Based} {Approach} to a {Dictionary} of {Latin} {Metaphors}}, volume = {20}, url = {doi:10.1515/joll-2021-2002.}, language = {English}, number = {2}, journal = {Journal of Latin linguistics}, author = {Buccheri, Alessandro and De Felice, Irene and Fedriani, Chiara and Short, William M.}, year = {2021}, pages = {163--189}, }
@misc{korkiakangas_late_2021, series = {Corpora}, title = {Late {Latin} {Charter} {Treebank}: contents and annotation}, url = {https://researchportal.helsinki.fi/en/publications/late-latin-charter-treebank-contents-and-annotation}, number = {16}, author = {Korkiakangas, T.}, year = {2021}, }
@article{czeti_structure_2021, title = {The structure of narrative in the story of {Baucis} and {Philemon}}, volume = {61}, language = {English}, journal = {Acta Antiqua Academiae Scientiarum Hungaricae}, author = {Czeti, István}, year = {2021}, pages = {243--267}, }
@incollection{tahmasebi_lexical_2021, series = {Language {Variation}}, title = {Lexical semantic change for {Ancient} {Greek} and {Latin}}, copyright = {Copyright (c) 2021 Nina Tahmasebi, Lars Borin, Adam Jatowt, Yang Xu, Simon Hengchen (Volume Editor)}, isbn = {978-3-96110-312-6}, url = {https://langsci-press.org/catalog/view/303/3035/2382-1}, abstract = {Change and its precondition, variation, are inherent in languages. Over time, new words enter the lexicon, others become obsolete, and existing words acquire new senses. Associating a word with its correct meaning in its historical context is a central challenge in diachronic research. Historical corpora of classical languages, such as Ancient Greek and Latin, typically come with rich metadata, and existing models are limited by their inability to exploit contextual information beyond the document timestamp. While embedding-based methods feature among the current state of the art systems, they are lacking in their interpretative power. In contrast, Bayesian models provide explicit and interpretable representations of semantic change phenomena. In this chapter we build on GASC, a recent computational approach to semantic change based on a dynamic Bayesian mixture model. In this model, the evolution of word senses over time is based not only on distributional information of lexical nature, but also on text genres. We provide a systematic comparison of dynamic Bayesian mixture models for semantic change with state-of-the-art embedding-based models. On top of providing a full description of meaning change over time, we show that Bayesian mixture models are highly competitive approaches to detect binary semantic change in both Ancient Greek and Latin.}, language = {en}, number = {6}, urldate = {2023-07-24}, booktitle = {Computational approaches to semantic change}, publisher = {Language Science Press}, author = {Perrone, Valerio and Hengchen, Simon and Palma, Marco and Vatri, Alessandro and Smith, Jim Q. and McGillivray, Barbara}, editor = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang and Hengchen, Simon}, month = feb, year = {2021}, doi = {10.5281/zenodo.5040241}, note = {Publication Title: Language Science Press}, pages = {287--310}, }
@article{gledic_survey_2021, title = {Survey of curricula: {Linguistics} and language-related degrees in {Europe}}, shorttitle = {Survey of curricula}, url = {https://zenodo.org/record/5030861}, abstract = {The needs analysis of the UPSKILLS project is the foundation for all subsequent project activities, and the survey of curricula as its first step is designed to provide insights for finetuning the interventions and materials that will be designed during the lifetime of the project, as well as for enlarging the pool of stakeholders to whom the project results will be disseminated. The survey of curricula has several steps: drawing a list of European language and linguistics degrees from international ranking websites, selecting and analyzing a representative sample of degrees based on a set of indicators agreed upon by all partners, and additional studying of a selection of degrees that the partners identified as exemplary in the context of the UPSKILLS project.}, urldate = {2023-10-02}, author = {Gledić, Jelena and Đukanović, Maja and Miličević Petrović, Maja and van der Lek, Iulianna and Assimakopoulos, Stavros}, month = jun, year = {2021}, note = {Publisher: Zenodo}, }
@article{gledic_upskills_2021, title = {{UPSKILLS} guidelines for {Learning} {Content} {Creation}}, url = {https://zenodo.org/record/8302296}, abstract = {The core of the UPSKILLS project is the production of learning content aimed at students in language- and linguistics-related fields (modern languages and cultures, translation, general linguistics, etc.) and lecturers who want to incorporate the developed content and/or add their own, into their teaching. The topics are selected in light of a comparative analysis of the current academic offer and the requirements the job market has for graduates in these areas, conducted under the UPSKILLS project. The main focus is on the knowledge and skills that are insufficiently covered in existing linguistics and language-related curricula but can open new job perspectives for students. The created learning content can be used as individual elements or as an integrated module. These guidelines are created to serve as: Reference material for UPSKILLS project partners – the partners will consult the guidelines as they create the learning content in line with the project goals. Teaching guides for those using the materials we create – those who wish to use our materials can gain insight into our approach and methodology Learning content creation guides – for those who wish to create new materials based on the model we developed under UPSKILLS}, urldate = {2023-10-02}, author = {Gledić, Jelena and Assimakopoulos, Stavros and Buchberger, Iva and Budimirović, Jelena and Đukanović, Maja and Kraš, Tihana and Podboj, Martina and Soldatić, Nađa and Vella, Michela}, month = sep, year = {2021}, note = {Publisher: Zenodo}, }
@article{linka_pain_2021, title = {Pain and the {Body} in {Corpus} {Hippocraticum}: {A} {Distributional} {Semantic} {Analysis}}, copyright = {Copyright (c) 2021}, issn = {2364-7957}, shorttitle = {Pain and the {Body} in {Corpus} {Hippocraticum}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/81212}, doi = {10.11588/dco.2021.7.81212}, abstract = {Die Autoren der im Corpus Hippocraticum versammelten medizinischen Abhandlungen erwähnen häufig den Schmerz, seine Eigenschaften und seinen Ursprung. Gleichzeitig liefern sie jedoch keine ausdrückliche Definition oder Theorie des Schmerzes, seiner Natur und seiner Beziehung zu anderen wichtigen Aspekten der hippokratischen Medizin. Außerdem verwenden sie mindestens vier Wortfamilien, von denen man annimmt, dass sie im Altgriechischen Schmerzen bezeichnen. Dies bringt moderne Forscher zu der Frage, wie sich diese vier Schmerzwörter semantisch unterscheiden und inwieweit sie auf einer gemeinsamen Vorstellung von Schmerz beruhen. In diesem Artikel versuchen wir, diese Fragen zu beantworten, indem wir das Korpus mit Hilfe verschiedener computergestützter Textanalysemethoden analysieren, insbesondere mit Hilfe eines Ansatzes zur distributionellen semantischen Modellierung. Unsere Ergebnisse zeigen einen engen Zusammenhang zwischen einigen dieser Schmerzwörter, Körperteilen und pathologischen Zuständen. Die Ergebnisse werden außerdem mit den Erkenntnissen verglichen, die durch traditionelles genaues Lesen der Quellen gewonnen wurden.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Linka, Vojtěch and Kaše, Vojtěch}, month = sep, year = {2021}, keywords = {DSM}, pages = {54--71}, }
@article{nikolaev_considerations_2021, title = {{SOME} {CONSIDERATIONS} {ON} {THE} {ATTRIBUTION} {OF} {THE} ‘{NEW} {APULEIUS}’}, volume = {71}, issn = {0009-8388, 1471-6844}, url = {https://www.cambridge.org/core/journals/classical-quarterly/article/some-considerations-on-the-attribution-of-the-new-apuleius/8F7FF4C1442452FC41CC6005E8501EB8}, doi = {10.1017/S0009838821000987}, abstract = {The ‘New Apuleius’ is a set of Latin summaries of Plato's works first published in 2016 by Justin Stover, who attributed it to Apuleius. The present article attempts to assess two key aspects of Stover's argument, viz. his reconstruction of the manuscript transmission of the new text and his use of computer-assisted stylometric techniques. The authors suggest that both strands of his argument are inconclusive. First, it is argued that the transposition of gatherings in the archetype of the Apuleian philosophica as envisaged by Stover is highly unrealistic. Second, replications of Stover's stylometric experiments show that their results are highly dependent on the particular algorithm settings and on the composition of the corpus. It is further shown that Stover's choice of highly specialized stylometric techniques is suboptimal, because popular generalist methods for statistical data analysis are demonstrably more successful in correctly identifying authors of Latin text fragments and do not support the case for Apuleius’ authorship of the new text. The authors conclude that there are no solid grounds to conclude that the ‘New Apuleius’ was indeed written by Apuleius.}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {The Classical Quarterly}, author = {Nikolaev, Dmitry and Shumilin, Mikhail}, month = dec, year = {2021}, note = {Publisher: Cambridge University Press}, keywords = {Apuleius, Burrow's Delta, attribution, computer-assisted stylometry, transmission, ‘New Apuleius’}, pages = {819--848}, }
@article{nagy_carmen_2021, title = {Carmen et {Standard} {Error}: {Computational} {Methods} in {Stylometry} for {Classical} {Latin} {Poetry}}, language = {en}, author = {Nagy, Benjamin C}, year = {2021}, }
@inproceedings{burns_profiling_2021, address = {Online}, title = {Profiling of {Intertextuality} in {Latin} {Literature} {Using} {Word} {Embeddings}}, url = {https://aclanthology.org/2021.naacl-main.389}, doi = {10.18653/v1/2021.naacl-main.389}, language = {en}, urldate = {2023-08-26}, booktitle = {Proceedings of the 2021 {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}}, publisher = {Association for Computational Linguistics}, author = {Burns, Patrick J. and Brofos, James A. and Li, Kyle and Chaudhuri, Pramit and Dexter, Joseph P.}, year = {2021}, pages = {4900--4907}, }
@inproceedings{sprugnoli_overview_2020, address = {Marseille, France}, title = {Overview of the {EvaLatin} 2020 {Evaluation} {Campaign}}, isbn = {979-10-95546-53-5}, url = {https://aclanthology.org/2020.lt4hala-1.16}, abstract = {This paper describes the first edition of EvaLatin, a campaign totally devoted to the evaluation of NLP tools for Latin. The two shared tasks proposed in EvaLatin 2020, i. e. Lemmatization and Part-of-Speech tagging, are aimed at fostering research in the field of language technologies for Classical languages. The shared dataset consists of texts taken from the Perseus Digital Library, processed with UDPipe models and then manually corrected by Latin experts. The training set includes only prose texts by Classical authors. The test set, alongside with prose texts by the same authors represented in the training set, also includes data relative to poetry and to the Medieval period. This also allows us to propose the Cross-genre and Cross-time subtasks for each task, in order to evaluate the portability of NLP tools for Latin across different genres and time periods. The results obtained by the participants for each task and subtask are presented and discussed.}, language = {English}, urldate = {2024-08-31}, booktitle = {Proceedings of {LT4HALA} 2020 - 1st {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, publisher = {European Language Resources Association (ELRA)}, author = {Sprugnoli, Rachele and Passarotti, Marco and Cecchini, Flavio Massimiliano and Pellegrini, Matteo}, editor = {Sprugnoli, Rachele and Passarotti, Marco}, month = may, year = {2020}, pages = {105--110}, }
@misc{berra_aurelberrastopwords_2020, title = {aurelberra/stopwords v2.3.0}, copyright = {Open Access}, url = {https://zenodo.org/record/1165205}, abstract = {This repository contains Ancient Greek and Latin stopwords for textual analysis.}, urldate = {2024-05-17}, publisher = {[object Object]}, author = {Berra, Aurélien}, month = may, year = {2020}, doi = {10.5281/ZENODO.1165205}, }
@techreport{stoeckel_voting_2020, title = {Voting for {POS} {Tagging} of {Latin} {Texts} : {Using} the {Flair} of {FLAIR} to {Better} {Ensemble} {Classifiers} by {Example} of {Latin}}, url = {https://aclanthology.org/2020.lt4hala-1.21.pdf}, number = {Proceedings of the LREC 2020 1st Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2020)}, author = {Stoeckel, Manuel and Henlein, Alexander and Hemati, Wahed and Mehler, Alexander}, year = {2020}, pages = {130--135}, }
@inproceedings{straka_udpipe_2020, title = {{UDPipe} at {EvaLatin} 2020: {Contextualized} embeddings and treebank embeddings}, url = {https://arxiv.org/pdf/2006.03687.pdf}, booktitle = {Proceedings of {LT4HALA} 2020-1st {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, author = {Straka, Milan and Straková, Jana}, year = {2020}, pages = {124--129}, }
@article{gorman_author_2020, title = {Author {Identification} of {Short} {Texts} {Using} {Dependency} {Treebanks} without {Vocabulary}}, volume = {35}, doi = {doi:10.1093/llc/fqz070}, number = {4}, journal = {Digital scholarship in the humanities}, author = {Gorman, Robert J.}, year = {2020}, pages = {812--825}, }
@inproceedings{yeruva_interpretation_2020, address = {Online}, title = {Interpretation of {Sentiment} {Analysis} in {Aeschylus}'s {Greek} {Tragedy}}, url = {https://www.aclweb.org/anthology/2020.latechclfl-1.17}, abstract = {Recent advancements in NLP and machine learning have created unique challenges and opportunities for digital humanities research. In particular, there are ample opportunities for NLP and machine learning researchers to analyze data from literary texts and to broaden our understanding of human sentiment in classical Greek tragedy. In this paper, we will explore the challenges and benefits from the human and machine collaboration for sentiment analysis in Greek tragedy and address some open questions related to the collaborative annotation for the sentiments in literary texts. We focus primarily on (i) an analysis of the challenges in sentiment analysis tasks for humans and machines, and (ii) whether consistent annotation results are generated from the multiple human annotators and multiple machine annotators. For human annotators, we have used a survey-based approach with about 60 college students. We have selected three popular sentiment analysis tools for machine annotators, including VADER, CoreNLP's sentiment annotator, and TextBlob. We have conducted a qualitative and quantitative evaluation and confirmed our observations on sentiments in Greek tragedy.}, urldate = {2021-05-21}, booktitle = {Proceedings of the {The} 4th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature}}, publisher = {International Committee on Computational Linguistics}, author = {Yeruva, Vijaya Kumari and ChandraShekar, Mayanka and Lee, Yugyung and Rydberg-Cox, Jeff and Blanton, Virginia and Oyler, Nathan A}, month = dec, year = {2020}, pages = {138--146}, }
@inproceedings{yeruva_interpretation_2020, title = {Interpretation of {Sentiment} {Analysis} with {Human}-in-the-{Loop}}, isbn = {1-72816-251-3}, publisher = {IEEE}, author = {Yeruva, Vijaya Kumari and Chandrashekar, Mayanka and Lee, Yugyung and Rydberg-Cox, Jeff and Blanton, Virginia and Oyler, Nathan A}, year = {2020}, pages = {3099--3108}, }
@inproceedings{sprugnoli_odi_2020, title = {Odi et {Amo}. {Creating}, {Evaluating} and {Extending} {Sentiment} {Lexicons} for {Latin}.}, url = {https://aclanthology.org/2020.lrec-1.376.pdf}, author = {Sprugnoli, Rachele and Passarotti, Marco and Corbetta, Daniela and Peverelli, Andrea}, year = {2020}, pages = {3078--3086}, }
@inproceedings{hellwig_treebank_2020, address = {Marseille, France}, title = {The {Treebank} of {Vedic} {Sanskrit}}, url = {https://aclanthology.org/2020.lrec-1.632/}, booktitle = {Proceedings of the {Twelfth} {Language} {Resources} and {Evaluation} {Conference}}, publisher = {European Language Resources Association}, author = {Hellwig, Oliver and Scarlata, Salvatore and Widmer, Paul}, year = {2020}, pages = {5137--5146}, }
@inproceedings{sprugnoli_overview_2020, address = {Marseille, France}, title = {Overview of the {EvaLatin} 2020 {Evaluation} {Campaign}}, url = {https://aclanthology.org/2020.lt4hala-1.16/}, booktitle = {Proceedings of {LT4HALA} 2020 - 1st {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, publisher = {European Language Resources Association (ELRA)}, author = {Sprugnoli, Rachele and Passarotti, Marco and Cecchini, Flavio Massimiliano and Pellegrini, Matteo}, year = {2020}, pages = {15--110}, }
@article{papantoniou_nlp_2020, title = {{NLP} for the {Greek} {Language}: {A} {Brief} {Survey}.}, doi = {https://doi.org/10.1145/3411408.3411410}, journal = {11th Hellenic Conference on Artificial Intelligence (SETN 2020)}, author = {Papantoniou, Katerina and Tzitzikas, Yannis}, year = {2020}, }
@techreport{pellegrini_using_2020, title = {Using {LatInfLexi} for an {Entropy}-{Based} {Assessment} of {Predictability} in {Latin} {Inflection}}, number = {Proceedings of the LREC 2020 1st Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2020)}, author = {Pellegrini, Matteo}, year = {2020}, pages = {37--46}, }
@article{vayansky_review_2020, title = {A review of topic modeling methods}, volume = {94}, url = {https://www.researchgate.net/profile/Sathish_Kumar50/publication/342288300_A_review_of_topic_modeling_methods/links/5ef14381299bf1faac6f22f9/A-review-of-topic-modeling-methods.pdf}, doi = {10.1016/j.is.2020.101582}, abstract = {Topic modeling is a popular analytical tool for evaluating data. Numerous methods of topic modeling have been developed which consider many kinds of relationships and restrictions within datasets; however, these methods are not frequently employed. Instead many researchers gravitate to Latent Dirichlet Analysis, which although flexible and adaptive, is not always suited for modeling more complex data relationships. We present different topic modeling approaches capable of dealing with correlation between topics, the changes of topics over time, as well as the ability to handle short texts such as encountered in social media or sparse text data. We also briefly review the algorithms which are used to optimize and infer parameters in topic modeling, which is essential to producing meaningful results regardless of method. We believe this review will encourage more diversity when performing topic modeling and help determine what topic modeling method best suits the user needs.}, journal = {Information Systems}, author = {Vayansky, Ike and Kumar, Sathish}, month = jun, year = {2020}, pages = {1--32}, }
@inproceedings{long_what_2020, title = {What is {AI} literacy? {Competencies} and design considerations}, url = {https://dl.acm.org/doi/pdf/10.1145/3313831.3376727}, author = {Long, Duri and Magerko, Brian}, year = {2020}, pages = {1--16}, }
@book{florio-hansen_digitalisierung_2020, title = {Digitalisierung, {Künstliche} {Intelligenz} und {Robotik}: {Eine} {Einführung} für {Schule} und {Unterricht}}, isbn = {978-3-8252-5429-2}, shorttitle = {Digitalisierung, {Künstliche} {Intelligenz} und {Robotik}}, url = {https://books.google.it/books?hl=en&lr=&id=HHUGEAAAQBAJ}, language = {de}, publisher = {UTB}, author = {Florio-Hansen, Inez De}, month = nov, year = {2020}, note = {Google-Books-ID: HHUGEAAAQBAJ}, }
@book{richards_fundamentals_2020, title = {Fundamentals of {Software} {Architecture}: {An} {Engineering} {Approach}}, isbn = {978-1-4920-4342-3}, shorttitle = {Fundamentals of {Software} {Architecture}}, url = {https://books.google.de/books?hl=de&lr=&id=xa7MDwAAQBAJ}, abstract = {Salary surveys worldwide regularly place software architect in the top 10 best jobs, yet no real guide exists to help developers become architects. Until now. This book provides the first comprehensive overview of software architecture’s many aspects. Aspiring and existing architects alike will examine architectural characteristics, architectural patterns, component determination, diagramming and presenting architecture, evolutionary architecture, and many other topics.Mark Richards and Neal Ford—hands-on practitioners who have taught software architecture classes professionally for years—focus on architecture principles that apply across all technology stacks. You’ll explore software architecture in a modern light, taking into account all the innovations of the past decade.This book examines:Architecture patterns: The technical basis for many architectural decisionsComponents: Identification, coupling, cohesion, partitioning, and granularitySoft skills: Effective team management, meetings, negotiation, presentations, and moreModernity: Engineering practices and operational approaches that have changed radically in the past few yearsArchitecture as an engineering discipline: Repeatable results, metrics, and concrete valuations that add rigor to software architecture}, language = {en}, publisher = {"O'Reilly Media, Inc."}, author = {Richards, Mark and Ford, Neal}, month = jan, year = {2020}, note = {Google-Books-ID: xa7MDwAAQBAJ}, keywords = {Computers / Software Development \& Engineering / General, Computers / Software Development \& Engineering / Systems Analysis \& Design, Computers / Software Development \& Engineering / Tools, Computers / Systems Architecture / Distributed Systems \& Computing}, }
@article{diemke_alkibiades_2020, title = {Alkibiades, {Pyrrhos} und {Alexander}: {Eine} {Untersuchung} zu {Emotionen} und {Gewalt} in den {Viten} {Plutarchs} unter {Verwendung} digitaler {Methoden}}, copyright = {Copyright (c) 2020 Digital Classics Online}, issn = {2364-7957}, shorttitle = {Alkibiades, {Pyrrhos} und {Alexander}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/77663}, doi = {10.11588/dco.2020.2.77663}, abstract = {Forscher haben die Bedeutung von Emotionen in Plutarchs Biographien weitgehend ignoriert, obwohl Emotionen für die Entstehung von Gewalt eine entscheidende Rolle spielen. Mit Hilfe von ERIS, einem Hamburger Informationssystem zur Darstellung griechischer und römischer Gewalt, werden Gewaltdarstellungen, die auf ein emotionales Motiv zurückgehen, in den Biographien von Alkibiades, Pyrrhos und Alexander untersucht. Durch eine Visualisierung lassen sich Muster und Beziehungen zwischen den Objekten und Merkmalen schneller erkennen. Die Ergebnisse zeigen, wie digitale Werkzeuge dazu beitragen können, neue Beziehungen zwischen Opfer, Täter, Waffe, Gewaltmethode und Motiv aufzudecken. Darüber hinaus zeigen die Ergebnisse, wie stark der wachsende Machteinfluss und das Fehlen von Paideia das Gewaltverhalten und die fehlende Selbstkontrolle der Protagonisten forcieren können. Die Untersuchung soll das Erkenntnispotenzial und den Mehrwert, der aus der Anwendung von digitaler und hermeneutischer Analyse resultiert, aufzeigen.}, language = {de}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Diemke, Justine}, month = dec, year = {2020}, keywords = {Informationssystem}, pages = {57--74}, }
@inproceedings{nicolosi_clarin-it_2020, title = {{CLARIN}-{IT} and the {Definition} of a {Digital} {Critical} {Edition} for {Ancient} {Greek} {Poetry}}, url = {https://ep.liu.se/en/conference-article.aspx?series=ecp&issue=172&Article_No=11}, doi = {10.3384/ecp2020172011}, abstract = {Ancient Greek studies, and Classics in general, is a perfect field of investigation in Digital Humanities. Indeed, DH approaches could become a means of building models for complex realities, analyzing them with computational methods and sharing the results with a broader public. Ancient texts have a complex tradition, which includes many witnesses (texts that handed down other texts) and different typologies of supports (papyri, manuscripts, and epigraphs). These texts are the basis of all European Literatures and it is crucial to spread their knowledge, in a reliable and easy way. Our project on ancient Greek fragmentary poetry (DEA - Digital Edition of Archilochus: New models and tools for authoring, editing and indexing an ancient Greek fragmentary author), growing out of the existing experience, tries to define a TEI-based digital critical edition combined with NLP techniques and semantic web technologies. Our goal is to provide a complete and reliable tool for scholars, suitable for critical studies in Classics, and a user-friendly environment also for non-specialist users. The project represents one of the attempts within the context of CLARIN-IT to contribute to the wider impact of CLARIN on the specific Italian community interested in Digital Classics. It is intended to improve services in fostering new knowledge in SSH digital research and sustaining the existing one.}, language = {en}, urldate = {2023-08-26}, author = {Nicolosi, Anika and Monachini, Monica and Nova, Beatrice}, month = jul, year = {2020}, pages = {85--93}, }
@article{ribary_corpus_2020, title = {A {Corpus} {Approach} to {Roman} {Law} {Based} on {Justinian}’s {Digest}}, volume = {7}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {2227-9709}, url = {https://www.mdpi.com/2227-9709/7/4/44}, doi = {10.3390/informatics7040044}, abstract = {Traditional philological methods in Roman legal scholarship such as close reading and strict juristic reasoning have analysed law in extraordinary detail. Such methods, however, have paid less attention to the empirical characteristics of legal texts and occasionally projected an abstract framework onto the sources. The paper presents a series of computer-assisted methods to open new frontiers of inquiry. Using a Python coding environment, we have built a relational database of the Latin text of the Digest, a historical sourcebook of Roman law compiled under the order of Emperor Justinian in 533 CE. Subsequently, we investigated the structure of Roman law by automatically clustering the sections of the Digest according to their linguistic profile. Finally, we explored the characteristics of Roman legal language according to the principles and methods of computational distributional semantics. Our research has discovered an empirical structure of Roman law which arises from the sources themselves and complements the dominant scholarly assumption that Roman law rests on abstract structures. By building and comparing Latin word embeddings models, we were also able to detect a semantic split in words with general and legal sense. These investigations point to a practical focus in Roman law which is consistent with the view that ancient law schools were more interested in training lawyers for practice rather than in philosophical neatness.}, language = {en}, number = {4}, urldate = {2023-08-26}, journal = {Informatics}, author = {Ribary, Marton and McGillivray, Barbara}, month = dec, year = {2020}, note = {Number: 4 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {Digest, Latin, LatinISE, Python, Roman law, clustering, computational linguistics, corpus linguistics, distributional semantics, word embeddings}, pages = {44}, }
@article{burns_ensemble_2020, title = {Ensemble lemmatization with the {Classical} {Language} {Toolkit}}, volume = {58}, copyright = {Copyright (c) 2020 Studi e Saggi Linguistici}, issn = {2281-9142}, url = {https://studiesaggilinguistici.it/ssl/article/view/273}, doi = {10.4454/ssl.v58i1.273}, abstract = {Because of the less-resourced nature of historical languages, non-standard solutions are often required for natural language processing tasks. This article introduces one such solution for historical-language lemmatization, that is the Ensemble lemmatizer for the Classical Language Toolkit, an open-source Python package that supports NLP research for historical languages. Ensemble lemmatization is the most recent development at CLTK in the repurposing and refactoring of an existing method designed for one task, specifically the backoff method as used for part-of-speech tagging, for use in a different task, namely lemmatization. This article argues for the benefits of ensemble lemmatization, specifically, flexible tool construction and the use of all available information to reach tagging decisions, and presents two use cases.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {Studi e Saggi Linguistici}, author = {Burns, Patrick J.}, month = sep, year = {2020}, note = {Number: 1}, pages = {157--176}, }
@article{sprugnoli_building_2020, title = {Building and {Comparing} {Lemma} {Embeddings} for {Latin}. {Classical} {Latin} versus {Thomas} {Aquinas}}, volume = {6}, copyright = {https://creativecommons.org/licenses/by-nc-nd/4.0/}, issn = {2499-4553}, url = {https://journals.openedition.org/ijcol/624}, doi = {10.4000/ijcol.624}, abstract = {This paper presents a new set of lemma embeddings for the Latin language. Embeddings are trained on a manually annotated corpus of texts belonging to the Classical era: different models, architectures and dimensions are tested and evaluated using a novel benchmark for the synonym selection task. In addition, we release vectors pre-trained on the “Opera Maiora” by Thomas Aquinas, thus providing a resource to analyze Latin in a diachronic perspective. The embeddings built upon the two training corpora are compared to each other to support diachronic lexical studies. The words showing the highest usage change between the two corpora are reported and a selection of them is discussed.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {IJCoL. Italian Journal of Computational Linguistics}, author = {Sprugnoli, Rachele and Moretti, Giovanni and Passarotti, Marco}, month = jun, year = {2020}, note = {Number: 1 Publisher: Accademia University Press}, pages = {29--45}, }
@misc{bamman_latin_2020, title = {Latin {BERT}: {A} {Contextual} {Language} {Model} for {Classical} {Philology}}, shorttitle = {Latin {BERT}}, url = {http://arxiv.org/abs/2009.10053}, abstract = {We present Latin BERT, a contextual language model for the Latin language, trained on 642.7 million words from a variety of sources spanning the Classical era to the 21st century. In a series of case studies, we illustrate the affordances of this language-specific model both for work in natural language processing for Latin and in using computational methods for traditional scholarship: we show that Latin BERT achieves a new state of the art for part-of-speech tagging on all three Universal Dependency datasets for Latin and can be used for predicting missing text (including critical emendations); we create a new dataset for assessing word sense disambiguation for Latin and demonstrate that Latin BERT outperforms static word embeddings; and we show that it can be used for semanticallyinformed search by querying contextual nearest neighbors. We publicly release trained models to help drive future work in this space.}, language = {en}, urldate = {2023-08-26}, publisher = {arXiv}, author = {Bamman, David and Burns, Patrick J.}, month = sep, year = {2020}, note = {arXiv:2009.10053 [cs]}, keywords = {Computer Science - Computation and Language}, }
@article{min_modeling_2019, title = {Modeling narrative structure and dynamics with networks, sentiment analysis, and topic modeling}, volume = {14}, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0226025}, number = {12}, journal = {PLoS ONE}, author = {Min, Semi and Park, Juyong}, year = {2019}, }
@inproceedings{sprugnoli_vir_2019, title = {Vir is to {Moderatus} as {Mulier} is to {Intemperans}. {Lemma} {Embeddings} for {Latin}}, url = {https://ceur-ws.org/Vol-2481/paper69.pdf}, author = {Sprugnoli, Rachele and Passarotti, Marco and Moretti, Giovanni}, year = {2019}, }
@inproceedings{mitchell_model_2019, title = {Model {Cards} for {Model} {Reporting}}, url = {http://arxiv.org/abs/1810.03993}, doi = {10.1145/3287560.3287596}, abstract = {Trained machine learning models are increasingly used to perform high-impact tasks in areas such as law enforcement, medicine, education, and employment. In order to clarify the intended use cases of machine learning models and minimize their usage in contexts for which they are not well suited, we recommend that released models be accompanied by documentation detailing their performance characteristics. In this paper, we propose a framework that we call model cards, to encourage such transparent model reporting. Model cards are short documents accompanying trained machine learning models that provide benchmarked evaluation in a variety of conditions, such as across different cultural, demographic, or phenotypic groups (e.g., race, geographic location, sex, Fitzpatrick skin type) and intersectional groups (e.g., age and race, or sex and Fitzpatrick skin type) that are relevant to the intended application domains. Model cards also disclose the context in which models are intended to be used, details of the performance evaluation procedures, and other relevant information. While we focus primarily on human-centered machine learning models in the application fields of computer vision and natural language processing, this framework can be used to document any trained machine learning model. To solidify the concept, we provide cards for two supervised models: One trained to detect smiling faces in images, and one trained to detect toxic comments in text. We propose model cards as a step towards the responsible democratization of machine learning and related AI technology, increasing transparency into how well AI technology works. We hope this work encourages those releasing trained machine learning models to accompany model releases with similar detailed evaluation numbers and other relevant documentation.}, urldate = {2024-06-01}, booktitle = {Proceedings of the {Conference} on {Fairness}, {Accountability}, and {Transparency}}, author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit}, month = jan, year = {2019}, note = {arXiv:1810.03993 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning}, pages = {220--229}, }
@article{rodda_vector_2019, title = {Vector space models of {Ancient} {Greek} word meaning, and a case study on {Homer}}, volume = {60}, issn = {1248-9433}, url = {https://aclanthology.org/2019.tal-3.4.pdf}, number = {3}, journal = {Traitement Automatique des Langues}, author = {Rodda, M and Probert, Philomen and McGillivray, Barbara}, year = {2019}, note = {Publisher: Lavoisier}, }
@inproceedings{devlin_bert_2019, address = {Minneapolis, Minnesota}, title = {{BERT}: {Pre}-training of {Deep} {Bidirectional} {Transformers} for {Language} {Understanding}}, shorttitle = {{BERT}}, url = {https://www.aclweb.org/anthology/N19-1423}, doi = {10.18653/v1/N19-1423}, abstract = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7\% (4.6\% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).}, urldate = {2020-11-25}, booktitle = {Proceedings of the 2019 {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}, {Volume} 1 ({Long} and {Short} {Papers})}, publisher = {Association for Computational Linguistics}, author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, month = jun, year = {2019}, pages = {4171--4186}, }
@inproceedings{bolt_stylometry_2019, address = {Hong Kong, China}, title = {A {Stylometry} {Toolkit} for {Latin} {Literature}}, booktitle = {Proceedings of the 2019 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing} and the 9th {International} {Joint} {Conference} on {Natural} {Language} {Processing} ({EMNLP}-{IJCNLP}): {System} {Demonstrations}}, publisher = {Association for Computational Linguistics}, author = {Bolt, Thomas J. and Flynt, Jeffrey H. and Chaudhuri, Pramit and Dexter, Joseph P}, year = {2019}, pages = {205--210}, }
@book{schubert_platon_2019, title = {Platon digital: {Tradition} und {Rezeption}}, isbn = {978-3-947450-07-7}, shorttitle = {Platon digital}, url = {https://nbn-resolving.org/urn:nbn:de:101:1-2019070314102240510161}, abstract = {Platon ist nach Homer der antike Autor mit der reichhaltigsten Rezeption vom Altertum über das Mittelalter bis in die Neuzeit. Gleichwohl und gerade aus diesem Grund ist diese bisher allenfalls bruchstückhaft aufgearbeitet worden. Die Autoren versuchen, diesem alten Ziel geisteswissenschaftlicher Forschung auf neuen Wegen näherzukommen, indem sie eine informationswissenschaftliche Perspektive auf Platon und seine Rezeption anwenden. Dazu sind innovative Methoden der Paraphrasensuche entwickelt worden, um diese auch als Methode altertumswissenschaftlich und kulturwissenschaftlich interessierter Forschung zu etablieren.}, language = {German}, urldate = {2020-02-17}, author = {Schubert, Charlotte and Molitor, Paul and Ritter, Jörg and Scharloth, Joachim and Sier, Kurt}, year = {2019}, keywords = {reference}, }
@book{berti_digital_2019, address = {Berlin}, series = {Age of {Access}? {Grundfragen} der {Informationsgesellschaft}}, title = {Digital classical philology: {Ancient} {Greek} and {Latin} in the digital revolution}, volume = {10}, isbn = {978-3-11-059678-6}, abstract = {Thanks to the digital revolution, even a traditional discipline like philology has been enjoying a renaissance within academia and beyond. Decades of work have been producing groundbreaking results, raising new research questions and creating innovative educational resources. This book describes the rapidly developing state of the art of digital philology with a focus on Ancient Greek and Latin, the classical languages of Western culture. Contributions cover a wide range of topics about the accessibility and analysis of Greek and Latin sources. The discussion is organized in five sections concerning open data of Greek and Latin texts; catalogs and citations of authors and works; data entry, collection and analysis for classical philology; critical editions and annotations of sources; and finally linguistic annotations and lexical databases. As a whole, the volume provides a comprehensive outline of an emergent research field for a new generation of scholars and students, explaining what is reachable and analyzable that was not before in terms of technology and accessibility.}, publisher = {Walter de Gruyter \& Co}, author = {Berti, Monica}, year = {2019}, keywords = {Humanités digitales, Numérisation, Philologie classique, reference}, }
@article{vainio_reconsidering_2019, title = {Reconsidering {Authorship} in the {Ciceronian} {Corpus} through {Computational} {Authorship} {Attribution}}, volume = {3}, url = {https://ojs.unito.it/index.php/COL/article/view/3518/3182}, language = {en}, number = {1}, journal = {Ciceroniana online}, author = {Vainio, Raija and Välimäki, Reima and Vesanto, Aleksi and Hella, Anni and Kaartinen, Marjo and Immonen, Teemu}, year = {2019}, pages = {15--48}, }
@techreport{keersmaekers_creating_2019, title = {Creating, {Enriching} and {Valorizing} {Treebanks} of {Ancient} {Greek}.}, url = {https://syntaxfest.github.io/syntaxfest19/proceedings/papers/paper_68.pdf}, institution = {Association for Computational Linguistics (ACL)}, author = {Keersmaekers, Alek and Mercelis, Wouter and Swaelens, Colin and Toon, Van Hal}, year = {2019}, pages = {109--117}, }
@misc{beyer_teaching_2019, address = {Berlin}, type = {Talk}, title = {Teaching {Digital} {Literacy} – {Interpretieren} in einer computergestützten {Lehr}-/{Lernumgebung}}, url = {https://doi.org/10.5281/zenodo.3674815}, language = {Deutsch}, author = {Beyer, Andrea and Reichetanz, Paul}, month = mar, year = {2019}, doi = {10.5281/zenodo.3674815}, keywords = {reference}, }
@inproceedings{franzini_nunc_2019, title = {Nunc {Est} {Aestimandum}: {Towards} an {Evaluation} of the {Latin} {WordNet}}, url = {https://www.researchgate.net/profile/Greta-Franzini-2/publication/336799230_Nunc_Est_Aestimandum_Towards_an_Evaluation_of_the_Latin_WordNet/links/5db2be42299bf111d4c83184/Nunc-Est-Aestimandum-Towards-an-Evaluation-of-the-Latin-WordNet.pdf}, author = {Franzini, Greta and Peverelli, Andrea and Ruffolo, Paolo and Passarotti, Marco and Sanna, Helena and Signoroni, Edoardo and Ventura, Viviana and Zampedri, Federica}, year = {2019}, }
@incollection{pockelmann_word_2019, title = {Word {Mover}’s {Distance} angewendet auf die {Paraphrasenextraktion} im {Altgriechischen}}, url = {https://books.ub.uni-heidelberg.de/propylaeum/reader/download/451/451-30-84795-1-10-20190507.pdf}, booktitle = {Platon {Digital}. {Tradition} und {Rezeption}}, publisher = {Propylaeum Heidelberg}, author = {Pöckelmann, Marcus and Ritter, Jörg and Molitor, Paul}, editor = {Schubert, Charlotte and Molitor, Paul and Ritter, Jörg and Sier, Kurt and Scharloth, Joachim}, year = {2019}, pages = {45--60}, }
@article{schubert_visualisierung_2019, title = {Visualisierung von {Textdaten}: {Die} {Falle} der {Metadaten} am {Beispiel} von {Iamblichs} {Protreptikos}}, copyright = {Copyright (c) 2019 Digital Classics Online}, issn = {2364-7957}, shorttitle = {Visualisierung von {Textdaten}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/59356}, doi = {10.11588/dco.2019.1.59356}, abstract = {„Digital Humanities analysieren nicht nur Bilder, sondern produzieren auch neue Bilder“[1] – diese alltägliche Feststellung beleuchtet einen Prozeß, dessen Verlauf einerseits noch ganz offen ist, da diese neuen Repräsentationsmöglichkeiten epistemisch keineswegs erfaßt sind, geschweige denn, daß die Entwicklung in diesem Bereich zu stabilen Praktiken geführt hätte. Andererseits zeigt sich ein unhinterfragter Siegeslauf, der auch schon zu einem neuen Feld wie dem der Visualization Literacy geführt hat. Im vorliegenden Beitrag wird die Visualisierung anhand von Metadaten untersucht. Gerade die Metadaten sind heute im Kontext der großen Datenmengen, die als ‚Big Data‘ anfallen von größter Bedeutung. Die für die Analyse von Big Data notwendigen Aggregationen von Daten sind ohne Metadaten nicht effizient und leistungsstark durchzuführen. Über die normale Datenerfassung hinaus legen Metadaten Muster offen, die sonst nicht sichtbar wären. Dies wiederum wird über ‚Verbildlichung’ als einer heute gängigen Repräsentationsform ermöglicht: Gerade auch für Texte gilt, daß sie in praktischen Anwendungen auf der Grundlage ihrer Metadaten durch Visualisierung zu Bildern werden, die quantitativ ausgewertet können und so wiederum auch in den Forschungsdiskurs eingehen. Von diesen gängigen Vorgehen ausgehend, stellt sich die Frage, ob diese gegenwärtige Praxis wissenschaftlichen Ansprüchen genügt oder ob sich derzeit nicht vielmehr – in einer Zeit, in der Daten als das Öl oder Gold des 21. Jahrhunderts betrachtet werden – eine Art Goldgräberstimmung und ein entsprechend unkritisches Verhalten etablieren. [1] Kwastek 2015; vgl. Kath et al. 2015.}, language = {de}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Schubert, Charlotte}, month = jun, year = {2019}, keywords = {Protreptikos}, pages = {4--21}, }
@article{riess_violence_2019, title = {Violence and the {Sea}: {A} {Digital} {Analysis} of {Maritime} {Acts} of {Violence} {Committed} by {Alcibiades} as {Described} by {Thucydides}, {Xenophon}, and {Plutarch}}, copyright = {Copyright (c) 2020}, issn = {2364-7957}, shorttitle = {Violence and the {Sea}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/72018}, doi = {10.11588/dco.2019.2.72018}, abstract = {Beim Vergleich der Gewaltmuster, die von Alkibiades in den Werken von Thukydides, Plutarch und Xenophon ausgeübt wurden, treten signifikante Unterschiede im Fokus der drei Autoren zutage, die durch "Eris. Das Hamburger Informationssystem über die Darstellung griechischer und römischer Gewalt" visuell dargestellt werden können. Die Tatsache, dass die Interpretation der graphischen Befunde – also der Kategorien zweiter Ordnung – dennoch zu sehr plausiblen Ergebnissen führt, zeigt, dass dieser Proof of Concept erfolgreich war. Die plausiblen Ergebnisse legen auch nahe, dass wir durch die Verwendung von Eris bisher unentdeckte Gewaltmuster bei der Untersuchung der großen Daten antiker Texte finden werden. Die Grafiken werden somit als Inspirationsquellen dienen, die neue Fragen aufwerfen, die aufgrund der großen Datenmengen noch nicht in unser Denken eingedrungen sind.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Riess, Werner}, year = {2019}, keywords = {Visualisierung}, pages = {4--27}, }
@article{chaudhuri_small_2019, title = {A small set of stylometric features differentiates {Latin} prose and verse}, volume = {34}, issn = {2055-7671}, url = {https://doi.org/10.1093/llc/fqy070}, doi = {10.1093/llc/fqy070}, abstract = {Identifying the stylistic signatures characteristic of different genres is of central importance to literary theory and criticism. In this article we report a large-scale computational analysis of Latin prose and verse using a combination of quantitative stylistics and supervised machine learning. We train a set of classifiers to differentiate prose and poetry with high accuracy (\>97\%) based on a set of twenty-six text-based, primarily syntactic features and rank the relative importance of these features to identify a low-dimensional set still sufficient to achieve excellent classifier performance. This analysis demonstrates that Latin prose and verse can be classified effectively using just three top features. From examination of the highly ranked features, we observe that measures of the hypotactic style favored in Latin prose (i.e. subordinating constructions in complex sentences, such as relative clauses) are especially useful for classification.}, number = {4}, urldate = {2023-08-26}, journal = {Digital Scholarship in the Humanities}, author = {Chaudhuri, Pramit and Dasgupta, Tathagata and Dexter, Joseph P and Iyer, Krithika}, month = dec, year = {2019}, pages = {716--729}, }
@article{mcgillivray_computational_2019, title = {A computational approach to lexical polysemy in {Ancient} {Greek}}, volume = {34}, issn = {2055-7671}, url = {https://doi.org/10.1093/llc/fqz036}, doi = {10.1093/llc/fqz036}, abstract = {Language is a complex and dynamic system. If we consider word meaning, which is the scope of lexical semantics, we observe that some words have several meanings, thus displaying lexical polysemy. In this article, we present the first phase of a project that aims at computationally modelling Ancient Greek semantics over time. Our system is based on Bayesian learning and on the Diorisis Ancient Greek corpus, which we have built for this purpose. We illustrate preliminary results in light of expert annotation, and take this opportunity to discuss the role of computational systems and human analysis in a complex research area like historical semantics. On the one hand, computational approaches allow us to model large corpora of texts. On the other hand, a long and rich scholarly tradition in Ancient Greek has provided us with valuable insights into the mechanisms of semantic change (cf. e.g. Leiwo, M. (2012). Introduction: variation with multiple faces. In Leiwo, M., Halla-aho, H., and Vierros, M. (eds), Variation and Change in Greek and Latin, Helsinki: Suomen Ateenan-instituutin säätiö, pp. 1–11.). In this article, we show that these qualitative analyses can be leveraged to support and complement the computational modelling.}, number = {4}, urldate = {2023-08-26}, journal = {Digital Scholarship in the Humanities}, author = {McGillivray, Barbara and Hengchen, Simon and Lähteenoja, Viivi and Palma, Marco and Vatri, Alessandro}, month = dec, year = {2019}, pages = {893--907}, }
@misc{nagy_metre_2019, title = {Metre as a stylometric feature in {Latin} hexameter poetry}, url = {http://arxiv.org/abs/1911.12478}, abstract = {This paper demonstrates that metre is a privileged indicator of authorial style in classical Latin hexameter poetry. Using only metrical features, pairwise classification experiments are performed between 5 first-century authors (10 comparisons) using four different machine-learning models. The results showed a two-label classification accuracy of at least 95\% with samples as small as ten lines and no greater than eighty lines (up to around 500 words). These sample sizes are an order of magnitude smaller than those typically recommended for BOW ('bag of words') or n-gram approaches, and the reported accuracy is outstanding. Additionally, this paper explores the potential for novelty (forgery) detection, or 'one-class classification'. An analysis of the disputed Aldine Additamentum (Sil. Ital. Puni. 8:144-225) concludes (p=0.0013) that the metrical style differs significantly from that of the rest of the poem.}, language = {en}, urldate = {2023-08-26}, publisher = {arXiv}, author = {Nagy, Benjamin}, month = dec, year = {2019}, note = {arXiv:1911.12478 [cs, stat]}, keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning, Statistics - Applications}, }
@misc{assael_restoring_2019, title = {Restoring ancient text using deep learning: a case study on {Greek} epigraphy}, shorttitle = {Restoring ancient text using deep learning}, url = {http://arxiv.org/abs/1910.06262}, abstract = {Ancient History relies on disciplines such as Epigraphy, the study of ancient inscribed texts, for evidence of the recorded past. However, these texts, “inscriptions”, are often damaged over the centuries, and illegible parts of the text must be restored by specialists, known as epigraphists. This work presents PYTHIA, the first ancient text restoration model that recovers missing characters from a damaged text input using deep neural networks. Its architecture is carefully designed to handle longterm context information, and deal efficiently with missing or corrupted character and word representations. To train it, we wrote a nontrivial pipeline to convert PHI, the largest digital corpus of ancient Greek inscriptions, to machine actionable text, which we call PHI-ML. On PHI-ML, PYTHIA’s predictions achieve a 30.1\% character error rate, compared to the 57.3\% of human epigraphists. Moreover, in 73.5\% of cases the ground-truth sequence was among the Top-20 hypotheses of PYTHIA, which effectively demonstrates the impact of this assistive method on the field of digital epigraphy, and sets the state-of-the-art in ancient text restoration.}, language = {en}, urldate = {2023-01-26}, publisher = {arXiv}, author = {Assael, Yannis and Sommerschield, Thea and Prag, Jonathan}, month = oct, year = {2019}, note = {arXiv:1910.06262 [cs]}, keywords = {Computer Science - Computation and Language, Computer Science - Computers and Society}, }
@incollection{cayless_sustaining_2019, address = {Berlin}, series = {Age of {Access}? {Grundfragen} der {Informationsgesellschaft}}, title = {Sustaining {Linked} {Ancient} {World} {Data}}, volume = {10}, abstract = {Abstract: May 31st, 2018 marked the sixth anniversary of the Linked Ancient World Data Institute (LAWDI), a workshop funded by the US National Endowment For the Humanities. This makes it a good time to take stock of the Ancient World Linked Data initiatives that have been around for some time, as well as some that have foundered and some that are new. What makes for sustainable Linked Open Data? Why do some initiatives thrive while others fail? What resources do successful LOD sites need, and how may they be obtained? The promise of LOD is that it frees our information from the silos in which it is housed, permitting cross-system interactions that improve the quality and usefulness of the information in any single system. This article will take the broader view of the definition of Linked Data suggested by Tim Berners-Lee’s foundational “Linked Data – Design Issues” paper, as encompassing more types of data than simply RDF and other “Semantic Web” technologies. This view of LOD is pragmatic and leverages the strengths of semantic technologies while avoiding their weaknesses.}, booktitle = {Digital classical philology: {Ancient} {Greek} and {Latin} in the digital revolution}, publisher = {Walter de Gruyter \& Co}, author = {Cayless, Hugh A.}, year = {2019}, pages = {35--50}, }
@inproceedings{celano_standoff_2019, address = {New York, NY, USA}, series = {{DATeCH2019}}, title = {Standoff {Annotation} for the {Ancient} {Greek} and {Latin} {Dependency} {Treebank}}, isbn = {978-1-4503-7194-0}, url = {https://doi.org/10.1145/3322905.3322919}, doi = {10.1145/3322905.3322919}, abstract = {This contribution presents the work in progress to convert the Ancient Greek and Latin Dependency Treebank (AGLDT) into standoff annotation using PAULA XML. With an increasing number of annotations of any kind, it becomes more and more urgent that annotations related to the same texts be added standoff. Standoff annotation consists in adding any kind of annotation in separate documents, which are ultimately linked to a main text, the so-called "base text," which is meant to be unchangeable. References occur via a graph-based system of IDs, which allows an annotation layer (contained in a separate file) to be linked to another annotation layer (contained in another separate file). All the annotations/files create a labeled directed acyclic graph, whose root is represented by the base text. Standoff annotation enables easy interoperability and extension, in that single annotation layers can reference other layers of annotation independently, thus overcoming the problem of conflicting hierarchies. Moreover, standoff annotation also allows addition of different annotations of the same kind to the same text (e.g., two different interpretations of the POS tag for a given token). In the present contribution, I show how the annotations of the AGLDT can become standoff using PAULA XML, which is an open access format following the LAF principles. More precisely, I show the case study of Caesar's De Bello Civili. I detail the PAULA XML files created for its tokenization and sentence split, which are preliminary required to add morphosyntactic annotation.}, urldate = {2023-04-26}, booktitle = {Proceedings of the 3rd {International} {Conference} on {Digital} {Access} to {Textual} {Cultural} {Heritage}}, publisher = {Association for Computing Machinery}, author = {Celano, Giuseppe G. A.}, year = {2019}, keywords = {Ancient Greek, Latin, PAULA XML, dependency treebank, standoff annotation}, pages = {149--153}, }
@incollection{forstall_lexical_2019, address = {Cham}, title = {Lexical {Matching}: {Text} {Reuse} as {Intertextuality}}, isbn = {978-3-030-23415-7}, shorttitle = {Lexical {Matching}}, url = {https://doi.org/10.1007/978-3-030-23415-7_3}, abstract = {Words are the basic unit for intertextual search in digital humanities. While published studies employ a variety of features depending on the textual practices with which they are concerned, the vast majority are combinations and transformations of word tokens. In this chapter, we introduce the idea of intertextual discovery through text-reuse detection, and present the core process common to most contemporary analyses.}, language = {en}, urldate = {2023-04-25}, booktitle = {Quantitative {Intertextuality}: {Analyzing} the {Markers} of {Information} {Reuse}}, publisher = {Springer International Publishing}, author = {Forstall, Christopher W. and Scheirer, Walter J.}, editor = {Forstall, Christopher W. and Scheirer, Walter J.}, year = {2019}, doi = {10.1007/978-3-030-23415-7_3}, pages = {55--78}, }
@book{forstall_quantitative_2019, address = {Cham}, title = {Quantitative {Intertextuality}: {Analyzing} the {Markers} of {Information} {Reuse}}, isbn = {978-3-030-23413-3 978-3-030-23415-7}, shorttitle = {Quantitative {Intertextuality}}, url = {http://link.springer.com/10.1007/978-3-030-23415-7}, language = {en}, urldate = {2023-04-25}, publisher = {Springer International Publishing}, author = {Forstall, Christopher W. and Scheirer, Walter J.}, year = {2019}, doi = {10.1007/978-3-030-23415-7}, keywords = {Authorship Attribution, Cultural Studies, Information Retrieval, Information Reuse, Pattern Matching}, }
@inproceedings{mambrini_linked_2019, address = {Paris, France}, title = {Linked {Open} {Treebanks}. {Interlinking} {Syntactically} {Annotated} {Corpora} in the {LiLa} {Knowledge} {Base} of {Linguistic} {Resources} for {Latin}}, abstract = {Paper about the procedure of inclusion of treebanks into the LiLa Knowledge Base of Linguistic Resources for Latin.}, booktitle = {Proceedings of the 18th {International} {Workshop} on {Treebanks} and {Linguistic} {Theories} ({TLT}, {SyntaxFest} 2019)}, author = {Mambrini, Francesco and Passarotti, Marco}, year = {2019}, keywords = {Latin, Linked Data, Treebank}, pages = {74--81}, }
@inproceedings{ochab_stylometry_2019, address = {New York, NY, USA}, series = {{DATeCH2019}}, title = {Stylometry of literary papyri}, isbn = {978-1-4503-7194-0}, url = {https://doi.org/10.1145/3322905.3322930}, doi = {10.1145/3322905.3322930}, abstract = {In this paper we present the first results of stylometric analysis of literary papyri. Specifically we perform a range of tests for unsupervised clustering of authors. We scrutinise both the best classic distance-based methods as well as the state-of-the-art network community detection techniqes. We report on obstacles concerning highly non-uniform distributions of text size and authorial samples combined with sparse feature space. We also note how clustering performance depends on regularisation of spelling by means of querying relevant annotations.}, urldate = {2023-04-25}, booktitle = {Proceedings of the 3rd {International} {Conference} on {Digital} {Access} to {Textual} {Cultural} {Heritage}}, publisher = {Association for Computing Machinery}, author = {Ochab, Jeremi K. and Essler, Holger}, year = {2019}, keywords = {ancient Greek, authorship attribution, papyri, stylometry}, pages = {139--142}, }
@article{kim_survey_2018, title = {A survey on sentiment and emotion analysis for computational literary studies}, journal = {arXiv preprint arXiv:1808.03137}, author = {Kim, Evgeny and Klinger, Roman}, year = {2018}, }
@book{bengfort_applied_2018, address = {Boston}, title = {Applied {Text} {Analysis} with {Python}: {Enabling} {Language}-{Aware} {Data} {Products} with {Machine} {Learning}.}, shorttitle = {Applied {Text} {Analysis} with {Python}}, url = {https://pdfroom.com/books/applied-text-analysis-with-python-enabling-language-aware-data-products-with-machine-learning/ra5179J6gJO}, abstract = {Applied Text Analysis with Python: Enabling Language-Aware Data Products with Machine Learning}, language = {en}, urldate = {2023-12-31}, publisher = {O'Reilly}, author = {Bengfort, Benjamin and Ojeda, Tony and Bilbro, Rebecca}, year = {2018}, }
@article{eckhoff_proiel_2018, title = {The {PROIEL} treebank family: a standard for early attestations of {Indo}-{European} languages}, volume = {52}, doi = {https://doi.org/10.1007/s10579-017-9388-5}, journal = {Lang Resources \& Evaluation}, author = {Eckhoff, H. and Bech, K. and Bouma, G.}, year = {2018}, pages = {29--65}, }
@article{ponti_non-configurationality_2018, title = {Non-{Configurationality} in {Diachrony} : {Correlations} in {Local} and {Global} {Networks} of {Ancient} {Greek} and {Latin}}, volume = {35}, url = {doi:10.1075/dia.00007.pon.}, number = {3}, journal = {Diachronica : international journal for historical linguistics = revue internationale pour la linguistique historique = internationale Zeitschrift für historische Linguistik}, author = {Ponti, Edoardo Maria and Luraghi, Silvia}, year = {2018}, pages = {367--392}, }
@incollection{cecchini_challenges_2018, address = {Bruxelles, Belgium}, series = {Special {Interest} {Group} on linguistic {DATa} and corpus-based approaches to {NLP} ({SIGDAT}), {ACL}.}, title = {Challenges in {Converting} the {Index} {Thomisticus} {Treebank} into {Universal} {Dependencies}}, url = {https://www.aclweb.org/anthology/W18-6004.pdf}, booktitle = {Proceedings of the {Second} {Workshop} on {Universal} {Dependencies} ({UDW} 2018) at {EMNLP} 2018}, author = {Cecchini, F. M. and Passarotti, M. and Marongiu, P. and Zeman, D.}, year = {2018}, pages = {27--36}, }
@incollection{berti_annotating_2018, address = {Zürich/New York}, title = {Annotating {Text} {Reuse} within the {Context}: {The} {Leipzig} {Open} {Fragmentary} {Texts} {Series} ({LOFTS})}, language = {English}, booktitle = {Text, {Kontext}, {Kontextualisierung}. {Moderne} {Kontextkonzepte} und antike {Literatur}}, author = {Berti, Monica}, editor = {Forst, Alexandra and Gärtner, Ursula and Tischer, Ute}, year = {2018}, pages = {223--234}, }
@inproceedings{monachini_digital_2018, title = {Digital {Classics}: {A} {Survey} on the {Needs} of {Ancient} {Greek} {Scholars} in {Italy}}, url = {https://www.clarin.eu/sites/default/files/Monachini-Nicolosi-Stefanini-CLARIN2017_paper_3.pdf}, abstract = {This paper presents and discusses the findings of a survey carried out in order to assess the use of digital resources and digital technologies with respect to work in Ancient Greek scholarship, as well as to identify the factors that are likely to constrain its use and to elicit needs and requirements of Ancient Greek scholars in Italy. The survey is in line with the principles behind the recent user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists.}, language = {en}, booktitle = {Proceedings of the {CLARIN} 2017 {Conference}}, publisher = {Linköping University Electronic Press}, author = {Monachini, Monica and Nicolosi, Anika and Stefanini, Alberto}, year = {2018}, keywords = {reference}, pages = {1--5}, }
@phdthesis{nury_automated_2018, type = {phdthesis}, title = {Automated {Collation} and {Digital} {Editions}: from {Theory} to {Practice}}, shorttitle = {Automated {Collation} and {Digital} {Editions}}, url = {https://hal.science/tel-02493805}, abstract = {The purpose of the dissertation is to investigate from a theoretical and methodological perspective the different tools that allow automated collation, and study the application of such tools to the creation of a digital critical edition in the context of Classical literature. By doing so, the dissertation examines many foundational but often neglected components of the philological method, such as the definition and wider implication of transcription, reading, and variant. The goal is to provide a reflection on automated collation and the theoretical as well as practical challenges it poses: what is automated collation? How is it performed, and what are the main differences with manual collation? What are the benefits of automated collation? Why has it not been widely adopted yet, despite the fact that it was developed to help scholars? How to process the results of collation programmes? As a case study, a Classical Latin text has been used to test automated collation and to compare the various existing tools.}, language = {en}, urldate = {2023-08-26}, school = {King's College London}, author = {Nury, Elisa}, month = jul, year = {2018}, }
@inproceedings{lana_eliciting_2018, address = {Cham}, series = {Communications in {Computer} and {Information} {Science}}, title = {Eliciting the {Ancient} {Geography} from a {Digital} {Library} of {Latin} {Texts}}, isbn = {978-3-319-73165-0}, doi = {10.1007/978-3-319-73165-0_19}, abstract = {Geolat – Geography for Latin Literature is a research project, aimed at making accessible a digital library containing the works of Latin literature (from its origins in 240 BCE to the end of the Roman Empire in 476 CE) through a query interface of geographic/cartographic type representing the geographic knowledge expressed in the Latin texts themselves. A core activity of the project has been the development of the ontology GO!, which describes the geographical knowledge contained in the texts of the library. The ontologically annotated texts will allow for a variety of scientifically relevant uses, apart from the geo-based browsing: for example the production of digital and printed critical editions. The project is under development at Dipartimento di Studi Umanistici of Università del Piemonte Orientale, and financially supported by Fondazione Compagnia di San Paolo.}, language = {en}, booktitle = {Digital {Libraries} and {Multimedia} {Archives}}, publisher = {Springer International Publishing}, author = {Lana, Maurizio and Tambassi, Timothy}, editor = {Serra, Giuseppe and Tasso, Carlo}, year = {2018}, keywords = {Classical latin texts, Digital library, Geography, OWL, Ontology, Web}, pages = {191--200}, }
@article{zhang_did_2018, title = {Did {Gaius} {Julius} {Caesar} {Write} {De} {Bello} {Hispaniensi}? {A} {Computational} {Study} of {Latin} {Classics} {Authorship}}, volume = {14}, copyright = {Copyright (c) 2018 Human IT: Journal for Information Technology Studies as a Human Science}, issn = {1402-151X}, shorttitle = {Did {Gaius} {Julius} {Caesar} {Write} {De} {Bello} {Hispaniensi}?}, url = {https://humanit.hb.se/article/view/515}, abstract = {This project addresses a two-millennium old mystery surrounding the authorship of ancient Latin war memoirs attributed to Caesar, using Distributional Semantics, a modern computational method for detecting written text patterns. The Civil War has been confirmed to be Caesar’s work, as well as the first seven of the eight chapters of the Gallic War, the eighth by Hirtius. The authorship of the African, Alexandrine, and Spanish Wars, though attributed to Caesar, is still under debate. Methods of distributional semantics derive representations of words from their distribution across a large amount of text, such that words that occur in similar contexts have similar representations. These representations can then be combined to model larger units of text, such as chapters and whole books. SemanticVectors software was used to calculate the similarity between chapters or books after dimension reduction using Random Indexing. The results show that the Gallic War’s eighth chapter is significantly different from its other seven chapters and from the Civil War, verifying the ability of distributional semantics to detect different Latin authorships. The African, Alexandrine, and Spanish Wars are notably different from the Civil War andGallic War (first seven chapters), suggesting that Caesar did not write these three. Furthermore, the African, Alexandrine, and Spanish Wars are different from each other and from the Civil and Gallic Wars, suggesting that they were written by different authors. This project demonstrates the value of distributional semantics in classics research. Its implications for digital humanities and real world problems such as plagiarism are discussed.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {Human IT: Journal for Information Technology Studies as a Human Science}, author = {Zhang, Olivia R. and Cohen, Trevor and McGill, Scott}, month = jun, year = {2018}, note = {Number: 1}, keywords = {Caesar, Classics, Latin, authorship attribution, computational linguistics, distributional semantics}, pages = {28--58}, }
@article{vierros_preprocessing_2017, title = {Preprocessing {Greek} {Papyri} for {Linguistic} {Annotation}}, doi = {https://doi.org/10.46298/jdmdh.1385}, number = {Numéro spécial sur le traitement assisté par ordinateur de l‘intertextualité dans les langues anciennes}, journal = {Journal of Data Mining \& Digital Humanities}, author = {Vierros, Marja and Henriksson, Erik}, year = {2017}, }
@book{mistrik_software_2017, title = {Software {Architecture} for {Big} {Data} and the {Cloud}}, isbn = {978-0-12-809338-2}, url = {https://books.google.de/books?hl=de&lr=&id=zvPtDQAAQBAJ}, abstract = {Software Architecture for Big Data and the Cloud is designed to be a single resource that brings together research on how software architectures can solve the challenges imposed by building big data software systems. The challenges of big data on the software architecture can relate to scale, security, integrity, performance, concurrency, parallelism, and dependability, amongst others. Big data handling requires rethinking architectural solutions to meet functional and non-functional requirements related to volume, variety and velocity. The book's editors have varied and complementary backgrounds in requirements and architecture, specifically in software architectures for cloud and big data, as well as expertise in software engineering for cloud and big data. This book brings together work across different disciplines in software engineering, including work expanded from conference tracks and workshops led by the editors. Discusses systematic and disciplined approaches to building software architectures for cloud and big data with state-of-the-art methods and techniques Presents case studies involving enterprise, business, and government service deployment of big data applications Shares guidance on theory, frameworks, methodologies, and architecture for cloud and big data}, language = {en}, publisher = {Morgan Kaufmann}, author = {Mistrik, Ivan and Bahsoon, Rami and Ali, Nour and Heisel, Maritta and Maxim, Bruce}, month = jun, year = {2017}, note = {Google-Books-ID: zvPtDQAAQBAJ}, keywords = {Computers / Software Development \& Engineering / General}, }
@article{bodard_standards_2017, title = {Standards for {Networking} {Ancient} {Person} data: {Digital} approaches to problems in prosopographical space}, copyright = {Copyright (c) 2017}, issn = {2364-7957}, shorttitle = {Standards for {Networking} {Ancient} {Person} data}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/37975}, doi = {10.11588/dco.2017.0.37975}, abstract = {Prosopographies disambiguate names appearing in sources by creating lists of persons, but the progress of scholarship now makes these lists difficult to maintain. In a digital context unique stable identifiers can be reshuffled ad libitum when searching and ordering information. Digital data increasingly brings together complementary research outputs: the Standards for Networking Ancient Prosopographies project takes on the challenge of creating an aggregated resource, adopting a Linked Open Data approach. In this paper we shall present three case studies highlighting the promise and problems of encoding unambiguous identities, titulature and other disambiguating information, and treating divine figures as person-data, respectively. Digital approaches are tools for research, assisting rather than replacing the historian, who remains central to the research endeavor.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Bodard, Gabriel and Cayless, Hugh and Depauw, Mark and Isaksen, Leif and Lawrence, Faith and Rahtz, Sebastian}, month = nov, year = {2017}, pages = {28--43}, }
@article{pockelmann_paraphrasensuche_2017, title = {Paraphrasensuche mittels word2vec und der {Word} {Mover}’s {Distance} im {Altgriechischen}}, copyright = {Copyright (c) 2017 Digital Classics Online}, issn = {2364-7957}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/40185}, doi = {10.11588/dco.2017.0.40185}, abstract = {To find receptions of Plato‘s work within the ancient Greek literature, automatic methods would be a useful assistance. Unfortunately, such methods are often knowledge-based and thus restricted to extensively annotated texts, which are not available to a sufficient extent for ancient Greek. In this article, we describe an approach that is based on the distributional hypotheses instead, to overcome the problem of missing annotations. This approach uses word2vec and the related Word Mover‘s Distance to determine phrases with similar meaning. Despite its experimental state, the method produces some meaningful results as shown in three examples.}, language = {de}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Pöckelmann, Marcus and Ritter, Jörg and Wöckener-Gade, Eva and Schubert, Charlotte}, month = dec, year = {2017}, keywords = {Word Mover’s Distance}, pages = {24--36}, }
@article{chaudhuri_bioinformatics_2017, title = {Bioinformatics and {Classical} {Literary} {Study}}, volume = {Numéro spécial sur le traitement assisté par ordinateur de l‘intertextualité dans les langues anciennes}, issn = {2416-5999}, url = {https://jdmdh.episciences.org/3807}, doi = {10.46298/jdmdh.1386}, abstract = {This paper describes the Quantitative Criticism Lab, a collaborative initiative between classicists, quantitative biologists, and computer scientists to apply ideas and methods drawn from the sciences to the study of literature. A core goal of the project is the use of computational biology, natural language processing, and machine learning techniques to investigate authorial style, intertextuality, and related phenomena of literary significance. As a case study in our approach, here we review the use of sequence alignment, a common technique in genomics and computational linguistics, to detect intertextuality in Latin literature. Sequence alignment is distinguished by its ability to find inexact verbal similarities, which makes it ideal for identifying phonetic echoes in large corpora of Latin texts. Although especially suited to Latin, sequence alignment in principle can be extended to many other languages.}, number = {Project presentations}, urldate = {2023-08-26}, journal = {Journal of Data Mining \& Digital Humanities}, author = {Chaudhuri, Pramit and Dexter, Joseph P.}, month = aug, year = {2017}, note = {Publisher: Episciences.org}, }
@article{dexter_quantitative_2017, title = {Quantitative criticism of literary relationships}, volume = {114}, url = {https://www.pnas.org/doi/abs/10.1073/pnas.1611910114}, doi = {10.1073/pnas.1611910114}, abstract = {Authors often convey meaning by referring to or imitating prior works of literature, a process that creates complex networks of literary relationships (“intertextuality”) and contributes to cultural evolution. In this paper, we use techniques from stylometry and machine learning to address subjective literary critical questions about Latin literature, a corpus marked by an extraordinary concentration of intertextuality. Our work, which we term “quantitative criticism,” focuses on case studies involving two influential Roman authors, the playwright Seneca and the historian Livy. We find that four plays related to but distinct from Seneca’s main writings are differentiated from the rest of the corpus by subtle but important stylistic features. We offer literary interpretations of the significance of these anomalies, providing quantitative data in support of hypotheses about the use of unusual formal features and the interplay between sound and meaning. The second part of the paper describes a machine-learning approach to the identification and analysis of citational material that Livy loosely appropriated from earlier sources. We extend our approach to map the stylistic topography of Latin prose, identifying the writings of Caesar and his near-contemporary Livy as an inflection point in the development of Latin prose style. In total, our results reflect the integration of computational and humanistic methods to investigate a diverse range of literary questions.}, number = {16}, urldate = {2023-08-26}, journal = {Proceedings of the National Academy of Sciences}, author = {Dexter, Joseph P. and Katz, Theodore and Tripuraneni, Nilesh and Dasgupta, Tathagata and Kannan, Ajay and Brofos, James A. and Bonilla Lopez, Jorge A. and Schroeder, Lea A. and Casarez, Adriana and Rabinovich, Maxim and Haimson Lushkov, Ayelet and Chaudhuri, Pramit}, month = apr, year = {2017}, note = {Publisher: Proceedings of the National Academy of Sciences}, pages = {E3195--E3204}, }
@article{almas_perseids_2017, title = {Perseids: {Experimenting} with {Infrastructure} for {Creating} and {Sharing} {Research} {Data} in the {Digital} {Humanities}}, volume = {16}, number = {19}, journal = {Data Science Journal}, author = {Almas, B.}, year = {2017}, pages = {1--17}, }
@inproceedings{erdmann_challenges_2016, address = {Osaka, Japan}, title = {Challenges and {Solutions} for {Latin} {Named} {Entity} {Recognition}}, abstract = {Although spanning thousands of years and genres as diverse as liturgy, historiography, lyric and other forms of prose and poetry, the body of Latin texts is still relatively sparse compared to English. Data sparsity in Latin presents a number of challenges for traditional Named Entity Recognition techniques. Solving such challenges and enabling reliable Named Entity Recognition in Latin texts can facilitate many down-stream applications, from machine translation to digital historiography, enabling Classicists, historians, and archaeologists for instance, to track the relationships of historical persons, places, and groups on a large scale. This paper presents the first annotated corpus for evaluating Named Entity Recognition in Latin, as well as a fully supervised model that achieves over 90\% F-score on a held-out test set, significantly outperforming a competitive baseline. We also present a novel active learning strategy that predicts how many and which sentences need to be annotated for named entities in order to attain a specified degree of accuracy when recognizing named entities automatically in a given text. This maximizes the productivity of annotators while simultaneously controlling quality.}, booktitle = {Proceedings of the {Workshop} on {Language} {Technology} {Resources} and {Tools} for {Digital} {Humanities} ({LT4DH})}, publisher = {The COLING 2016 Organizing Committee.}, author = {Erdmann, Alexander and Brown, Christopher and Joseph, Brian and Janse, Mark and Ajaka, Petra and Elsner, Micha and De Marneffe, Marie-Catherine}, year = {2016}, pages = {85--93}, }
@article{springmann_latmor_2016, title = {{LatMor} : {A} {Latin} {Finite}-{State} {Morphology} {Encoding} {Vowel} {Quantity}}, volume = {2}, url = {doi:10.1515/opli-2016-0019.}, number = {1}, journal = {Open linguistics}, author = {Springmann, Uwe and Schmid, Helmut and Najock, Dietmar}, year = {2016}, pages = {386--392}, }
@book{korkiakangas_subject_2016, address = {Helsinki}, series = {Commentationes {Humanarum} {Litterarum}}, title = {Subject {Case} in the {Latin} of {Tuscan} {Charters} of the 8th and 9th {Centuries}.}, volume = {133}, publisher = {Societas Scientiarum Fennica}, author = {Korkiakangas, Timo}, year = {2016}, }
@incollection{mahony_open_2016, title = {Open {Education} and {Open} {Educational} {Resources} for the {Teaching} of {Classics} in the {UK}}, url = {https://www.ubiquitypress.com/site/books/10.5334/bat/}, abstract = {Edited by organizers of “Digital Classicist” seminars in London and Berlin, this volume addresses the impact of computational approaches to the study of antiquity on audiences other than the scholars who conventionally publish it. In addition to colleagues in classics and digital humanities, the eleven chapters herein concern and are addressed to students, heritage professionals and “citizen scientists”.{\textless}br{\textgreater}{\textless}br{\textgreater}Each chapter is a scholarly contribution, presenting research questions in the classics, digital humanities or, in many cases, both. They are all also examples of work within one of the most important areas of academia today: scholarly research and outputs that engage with collaborators and audiences not only including our colleagues, but also students, academics in different fields including the hard sciences, professionals and the broader public. Collaboration and scholarly interaction, particularly with better-funded and more technically advanced disciplines, is essential to digital humanities and perhaps even more so to digital classics. The international perspectives on these issues are especially valuable in an increasingly connected, institutionally and administratively diverse world.{\textless}br{\textgreater}{\textless}br{\textgreater}This book addresses the broad range of issues scholars and practitioners face in engaging with students, professionals and the public, in accessible and valuable chapters from authors of many backgrounds and areas of expertise, including language and linguistics, history, archaeology and architecture. This collection will be of interest to teachers, scientists, cultural heritage professionals, linguists and enthusiasts of history and antiquity.}, language = {en}, urldate = {2020-02-04}, booktitle = {Digital {Classics} {Outside} the {Echo}-{Chamber}}, publisher = {Ubiquity Press}, author = {Mahony, Simon}, editor = {Romanello, Matteo and Bodard, Gabriel}, month = apr, year = {2016}, doi = {10.5334/bat}, keywords = {reference}, pages = {33--50}, }
@inproceedings{nivre_universal_2016, title = {Universal dependencies v1: {A} multilingual treebank collection}, url = {https://www.aclweb.org/anthology/L16-1262.pdf}, author = {Nivre, Joakim and De Marneffe, Marie-Catherine and Ginter, Filip and Goldberg, Yoav and Hajic, Jan and Manning, Christopher D and McDonald, Ryan and Petrov, Slav and Pyysalo, Sampo and Silveira, Natalia}, year = {2016}, pages = {1659--1666}, }
@article{kirby_computational_2016, title = {A {Computational} {Method} for {Comparative} {Greek} and {Latin} {Prosimetrics}}, language = {en}, author = {Kirby, Joseph Tyler}, year = {2016}, }
@article{stover_reassessing_2016, title = {{REASSESSING} {THE} {APULEIAN} {CORPUS}: {A} {COMPUTATIONAL} {APPROACH} {TO} {AUTHENTICITY}}, volume = {66}, issn = {0009-8388, 1471-6844}, shorttitle = {{REASSESSING} {THE} {APULEIAN} {CORPUS}}, url = {https://www.cambridge.org/core/journals/classical-quarterly/article/abs/reassessing-the-apuleian-corpus-a-computational-approach-to-authenticity/6BFD216C8AC552673B16BBA0FA40FF54}, doi = {10.1017/S0009838816000768}, abstract = {The renaissance of Apuleian studies of the past few decades shows no signs of abating.1 The summer of 2014 may well be the highest watermark yet recorded in the tide of interest in Apuleius: June and July alone saw the release of two monographs, one each from Oxford University Press and Cambridge University Press, and one edited conference volume, from Routledge.2 The clearest sign that the sophist of Madauros has come into his own is his admission into the exclusive club of the Oxford Classical Texts: the first volume of his complete works containing the Metamorphoses edited by Maaike Zimmerman came out in 2012. One of the most salutary effects of this renewed interest has been the reappraisal of the ‘whole Apuleius’: Apuleius has more to offer than just the Metamorphoses, and recent scholarship on the rhetorica and the philosophica have shown not only how these opera minora can help us understand the opus maius, but also how they are important and interesting documents in their own right.3}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {The Classical Quarterly}, author = {Stover, Justin and Kestemont, Mike}, month = dec, year = {2016}, note = {Publisher: Cambridge University Press}, pages = {645--672}, }
@inproceedings{elsner_automatic_2016, address = {Berlin, Germany}, title = {Automatic discovery of {Latin} syntactic changes}, url = {http://aclweb.org/anthology/W16-2120}, doi = {10.18653/v1/W16-2120}, abstract = {Syntactic change tends to affect constructions, but treebanks annotate lower-level structure: PCFG rules or dependency arcs. This paper extends prior work in native language identification, using Tree Substitution Grammars to discover constructions which can be tested for historical variability. In a case study comparing Classical and Medieval Latin, the system discovers several constructions corresponding to known historical differences, and learns to distinguish the two varieties with high accuracy. Applied to an intermediate text (the Vulgate Bible), it indicates which changes between the eras were already occurring at this earlier stage.}, language = {en}, urldate = {2023-08-26}, booktitle = {Proceedings of the 10th {SIGHUM} {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities}}, publisher = {Association for Computational Linguistics}, author = {Elsner, Micha and Lane, Emily}, year = {2016}, pages = {156--164}, }
@article{field_automated_2016, title = {An {Automated} {Approach} to {Syntax}-based {Analysis} of {Classical} {Latin}}, copyright = {Copyright (c) 2016 Anjalie Field}, issn = {2364-7957}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/32315}, doi = {10.11588/dco.2016.0.32315}, abstract = {The goal of this study is to present an automated method for analyzing the style of Latin authors. Many of the common automated methods in stylistic analysis are based on lexical measures, which do not work well with Latin because of the language’s high degree of inflection and free word order. In contrast, this study focuses on analysis at a syntax level by examining two constructions, the ablative absolute and the cum clause. These constructions are often interchangeable, which suggests an author’s choice of construction is typically more stylistic than functional. We first identified these constructions in hand-annotated texts. Next we developed a method for identifying the constructions in unannotated texts, using probabilistic morphological tagging. Our methods identified constructions with enough accuracy to distinguish among different genres and different authors. In particular, we were able to determine which book of Caesar’s Commentarii de Bello Gallico was not written by Caesar. Furthermore, the usage of ablative absolutes and cum clauses observed in this study is consistent with the usage scholars have observed when analyzing these texts by hand. The proposed methods for an automatic syntax-based analysis are shown to be valuable for the study of classical literature.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Field, Anjalie}, month = dec, year = {2016}, pages = {57--78}, }
@article{kestemont_authenticating_2016, title = {Authenticating the writings of {Julius} {Caesar}}, volume = {63}, issn = {09574174}, url = {https://linkinghub.elsevier.com/retrieve/pii/S0957417416303116}, doi = {10.1016/j.eswa.2016.06.029}, language = {en}, urldate = {2023-07-23}, journal = {Expert Systems with Applications}, author = {Kestemont, Mike and Stover, Justin and Koppel, Moshe and Karsdorp, Folgert and Daelemans, Walter}, month = nov, year = {2016}, pages = {86--96}, }
@book{speich_algorithms_2016, address = {Düsseldorf}, title = {Algorithms and {Aristotle}}, isbn = {978-3-945627-13-6}, abstract = {A glossary of key terms on digitalization and its effects on the labour market and education can be found in alphabetical order throughout the book}, publisher = {Vodafone Foundation Germany}, editor = {Speich, M.}, year = {2016}, }
@article{stover_authorship_2016, title = {The {Authorship} of the "{Historia} {Augusta}": {Two} {New} {Computational} {Studies}}, volume = {59}, issn = {0076-0730}, shorttitle = {The {Authorship} of the "{Historia} {Augusta}"}, url = {https://www.jstor.org/stable/44254158}, abstract = {The case of the Historia Augusta, a collection of imperial biographies from Hadrian to Cams supposedly written by six different authors, provided the impetus for the introduction of computational methods into the Echtheitskritik of ancient authors in 1979. After a flurry of studies in the 1990s, interest waned, particularly because most of those studies seemed to support conclusions incompatible with the scholarly consensus on the question. In the paper, we approach this question with the new tool of authorship verification – one of the most promising approaches in forensic stylometry today – as well as the established method of principal components analysis to demonstrate that there is no simple alternative between single and multiple authorship, and that the results of a computational analysis are in fact compatible with the results obtained from historical, literary, and philological analysis.}, number = {2}, urldate = {2023-07-23}, journal = {Bulletin of the Institute of Classical Studies}, author = {Stover, Justin A. and Kestemont, Mike}, year = {2016}, note = {Publisher: Wiley}, pages = {140--157}, }
@inproceedings{straka_udpipe_2016, title = {{UDPipe}: {Trainable} {Pipeline} for {Processing} {CoNLL}-{U} {Files} {Performing} {Tokenization}, {Morphological} {Analysis}, {POS} {Tagging} and {Parsing}}, booktitle = {{LREC}}, author = {Straka, Milan and Hajic, Jan and Straková, Jana}, year = {2016}, pages = {4290--4297}, }
@article{eckhoff_linguistics_2015, title = {Linguistics vs. digital editions: {The} {Tromsø} {Old} {Russian} and {OCS} {Treebank}}, url = {https://munin.uit.no/bitstream/handle/10037/22366/article.pdf?sequence=2&isAllowed=y}, journal = {Scripta \& e-Scripta}, author = {Eckhoff, H. M. and Bredicevskis, A.}, year = {2015}, pages = {9--25}, }
@article{fusi_multilanguage_2015, title = {A {Multilanguage}, {Modular} {Framework} for {Metrical} {Analysis} : {It} {Patterns} and {Theorical} {Issues}}, volume = {199}, doi = {doi:10.3917/lang.199.0041.}, journal = {Langages}, author = {Fusi, DanieleFusi}, year = {2015}, pages = {41--66}, }
@article{de_felice_classes_2015, title = {{CLaSSES} : {A} {New} {Digital} {Resource} for {Latin} {Epigraphy}}, volume = {1}, doi = {doi:10.1418/90427}, number = {1}, journal = {Italian journal of computational linguistics}, author = {De Felice, Irene and Donati, Margherita and Giovanna, Marotta}, year = {2015}, pages = {125--136}, }
@article{mcgillivray_computational_2015, title = {Computational {Valency} {Lexica} for {Latin} and {Greek} in {Use} : {A} {Case} {Study} of {Syntactic} {Ambiguity}}, volume = {14}, url = {doi:10.1515/joll-2015-0005}, number = {1}, journal = {Journal of Latin Linguistics}, author = {McGillivray, Barbara and Vatri, Alessandro}, year = {2015}, pages = {101--126}, }
@inproceedings{bjerva_word_2015, address = {Beijing, China}, title = {Word {Embeddings} {Pointing} the {Way} for {Late} {Antiquity}}, url = {https://www.aclweb.org/anthology/W15-3708}, doi = {10.18653/v1/W15-3708}, urldate = {2021-04-24}, booktitle = {Proceedings of the 9th {SIGHUM} {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities} ({LaTeCH})}, publisher = {Association for Computational Linguistics}, author = {Bjerva, Johannes and Praet, Raf}, month = jul, year = {2015}, pages = {53--57}, }
@book{richards_software_2015, title = {Software {Architecture} {Patterns}}, url = {http://103.62.146.201:8081/xmlui/handle/1/5665}, abstract = {It’s all too common for developers to start coding an application without a formal architecture in place. Without a clear and well- defined architecture, most developers and architects will resort to}, language = {en}, urldate = {2023-08-31}, author = {Richards, Mark}, year = {2015}, note = {Accepted: 2018-01-19T11:08:40Z}, }
@article{simon_linking_2015, title = {Linking early geospatial documents, one place at a time: annotation of geographic documents with {Recogito}}, volume = {10}, issn = {1790-3769}, shorttitle = {Linking early geospatial documents, one place at a time}, url = {http://www.e-perimetron.org/Vol_10_2/Simon_et_al.pdf}, abstract = {Recogito is an open source tool for the semi-automatic annotation of place references in maps and texts. It was developed as part of the Pelagios 3 research project, which aims to build up a comprehensive directory of places referred to in early maps and geographic writing predating the year 1492. Pelagios 3 focuses specifically on sources from the Classical Latin, Greek and Byzantine periods; on Mappae Mundi and narrative texts from the European Medieval period; on Late Medieval Portolans; and on maps and texts from the early Islamic and early Chinese traditions. Since the start of the project in September 2013, the team has harvested more than 120,000 toponyms, manually verifying almost 60,000 of them. Furthermore, the team held two public annotation workshops supported through the Open Humanities Awards 2014. In these workshops, a mixed audience of students and academics of different backgrounds used Recogito to add several thousand contributions on each workshop day. A number of benefits arise out of this work: on the one hand, the digital identification of places – and the names used for them – makes the documents' contents amenable to information retrieval technology, i.e. documents become more easily search- and discoverable to users than through conventional metadata-based search alone. On the other hand, the documents are opened up to new forms of re-use. For example, it becomes possible to “map” and compare the narrative of texts, and the contents of maps with modern day tools like Web maps and GIS; or to analyze and contrast documents’ geographic properties, toponymy and spatial relationships. Seen in a wider context, we argue that initiatives such as ours contribute to the growing ecosystem of the “Graph of Humanities Data” that is gathering pace in the Digital Humanities (linking data about people, places, events, canonical references, etc.), which has the potential to open up new avenues for computational and quantitative research in a variety of fields including History, Geography, Archaeology, Classics, Genealogy and Modern Languages.}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {e-Perimetron}, author = {Simon, Rainer and Barker, Elton and Isaksen, Leif and de Soto Cañamares, Pau}, year = {2015}, note = {Number: 2}, pages = {49--59}, }
@article{revellio_classics_2015, title = {Classics and the {Digital} {Age} {Advantages} and limitations of digital text analysis in classical philology}, issn = {2364-5342}, url = {https://kops.uni-konstanz.de/server/api/core/bitstreams/620defc4-effd-4224-bfb6-782e20748e01/content}, abstract = {Die Klassische Philologie nahm computergestützte Methoden der Textanalyse früh als Chance wahr. Um einen Einblick in die jüngsten Entwicklungen der digitalen Textanalyse im Bereich der Latinistik zu geben wird eine Auswahl bestehender Textdatenbanken wie gängiger Analysetools vorgestellt, wobei insbesondere auf das Phänomen der Intertextualität als Untersuchungsfeld fokussiert wird. Zudem werden unmittelbar verknüpfte Themen wie die Digitalisierung und langfristige Erhaltung antiker Texte, der Status unterschiedlicher Text-Surrogate sowie die Notwendigkeit fremdsprachlicher Kenntnisse diskutiert.}, urldate = {2023-08-26}, author = {Revellio, Marie}, year = {2015}, pages = {1--16}, }
@book{blackwell_canonical_2015, title = {The {Canonical} {Text} {Services} {URN} {Specification}, {Version} 2.0.{Rc}.1 [{CITE} / {URN}]}, author = {Blackwell, Christopher and Smith, Neel}, year = {2015}, }
@article{coffee_tesserae_2014, title = {Tesserae: {A} {Search} {Engine} for {Allusion}}, shorttitle = {Tesserae}, url = {https://hcommons.org/deposits/item/hc:12221/}, abstract = {The Tesserae Project is an interdisciplinary research effort employing computational methods to detect and analyze literary allusion (a form of text reuse) currently focusing on Latin and ancient Greek. The Project seeks funding to create a fully-functional, publicly available tool to detect similar phrases in two texts at rates that approach those of literary commentators. To this end, funding will support adding sensitivity to word meaning, phrase context, and sound similarity. Detection rate improvements will be measured against a set of 3000 parallel phrases previously graded for literary significance. A revised website will inform researchers of research results and new functions of the tool. The project team will give presentations and produce publications explaining the function, results, and theoretical consequences of the fully operational tool. This work is preliminary to an out-year Implementation Phase that will see the addition of English, French, Italian, and Spanish.}, language = {en-US}, urldate = {2023-08-26}, author = {Coffee, Neil and Koenig, Jean-Pierre}, month = may, year = {2014}, }
@inproceedings{passarotti_syntax_2014, address = {Gothenburg, Sweden}, title = {From {Syntax} to {Semantics}. {First} {Steps} {Towards} {Tectogrammatical} {Annotation} of {Latin}}, url = {http://aclweb.org/anthology/W14-0615}, doi = {10.3115/v1/W14-0615}, abstract = {Assuming that collaboration between theoretical and computational linguistics is essential in projects aimed at developing language resources like annotated corpora, this paper presents the first steps of the semantic annotation of the Index Thomisticus Treebank, a dependency-based treebank of Medieval Latin. The semantic layer of annotation of the treebank is detailed and the theoretical framework supporting the annotation style is explained and motivated.}, language = {en}, urldate = {2023-08-26}, booktitle = {Proceedings of the 8th {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities} ({LaTeCH})}, publisher = {Association for Computational Linguistics}, author = {Passarotti, Marco}, year = {2014}, pages = {100--109}, }
@article{almas_linked_2014, title = {Linked {Data} in the {Perseus} {Digital} {Library}}, volume = {7}, url = {http://dlib.nyu.edu/awdl/isaw/isaw-papers/7/almas-babeu-krohn/}, number = {3}, journal = {ISAW Papers}, author = {Almas, B. and Babeu, A. and Krohn, A.}, year = {2014}, }
@article{koppel_determining_2014, title = {Determining if two documents are written by the same author}, volume = {65}, issn = {2330-1643}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.22954}, doi = {10.1002/asi.22954}, abstract = {Almost any conceivable authorship attribution problem can be reduced to one fundamental problem: whether a pair of (possibly short) documents were written by the same author. In this article, we offer an (almost) unsupervised method for solving this problem with surprisingly high accuracy. The main idea is to use repeated feature subsampling methods to determine if one document of the pair allows us to select the other from among a background set of “impostors” in a sufficiently robust manner.}, language = {en}, number = {1}, urldate = {2023-07-23}, journal = {Journal of the Association for Information Science and Technology}, author = {Koppel, Moshe and Winter, Yaron}, year = {2014}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/asi.22954}, keywords = {automatic classification, machine learning, natural language processing}, pages = {178--187}, }
@incollection{tiepmar_new_2014, title = {A new implementation for canonical text services [{CTS}]}, isbn = {978-1-63266-401-3}, booktitle = {Proceedings of the 8th {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities} ({LaTeCH})}, author = {Tiepmar, J. and Teichmann, C. and Heyer, G. and Berti, M. and Crane, G.}, year = {2014}, pages = {1--8}, }
@article{eckhoff_mapping_2013, title = {Mapping out the {Source} {Domain} : {Evidence} from {Parallel} {Old} {Indo}-{European} {Data}}, volume = {37}, doi = {doi:10.1075/sl.37.2.03eck}, number = {2}, journal = {Studies in language}, author = {Eckhoff, Hanne Martine and Thomason, Olga A. and Swart, Peter}, year = {2013}, pages = {302--355}, }
@book{moretti_distant_2013, title = {Distant {Reading}}, isbn = {978-1-78168-333-0}, url = {https://books.google.de/books?hl=en&lr=&id=Wo4110IdRLMC}, abstract = {WINNER OF THE NATIONAL BOOK CRITICS CIRCLE AWARD How does a literary historian end up thinking in terms of z-scores, principal component analysis, and clustering coefficients? The essays in Distant Reading led to a new and often contested paradigm of literary analysis. In presenting them here Franco Moretti reconstructs his intellectual trajectory, the theoretical influences over his work, and explores the polemics that have often developed around his positions.From the evolutionary model of “Modern European Literature,” through the geo-cultural insights of “Conjectures of World Literature” and “Planet Hollywood,” to the quantitative findings of “Style, inc.” and the abstract patterns of “Network Theory, Plot Analysis,” the book follows two decades of conceptual development, organizing them around the metaphor of “distant reading,” that has come to define—well beyond the wildest expectations of its author—a growing field of unorthodox literary studies.}, language = {en}, publisher = {Verso Books}, author = {Moretti, Franco}, month = jun, year = {2013}, note = {Google-Books-ID: Wo4110IdRLMC}, keywords = {Literary Collections / Essays, Literary Criticism / Comparative Literature, Literary Criticism / Semiotics \& Theory, reference}, }
@article{mikolov_efficient_2013, title = {Efficient estimation of word representations in vector space}, shorttitle = {{Word2Vec}}, url = {https://arxiv.org/pdf/1301.3781.pdf}, journal = {arXiv preprint arXiv:1301.3781}, author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey}, year = {2013}, keywords = {reference}, pages = {1--12}, }
@book{schmidt_pattern-oriented_2013, title = {Pattern-oriented software architecture, patterns for concurrent and networked objects}, volume = {2}, isbn = {1-118-72517-4}, url = {https://www.researchgate.net/profile/Michael-Stal/publication/215835789_Pattern-Oriented_Software_Architecture_Patterns_for_Concurrent_and_Networked_Objects_Volume_2/links/53fd98440cf2dca8000356c5/Pattern-Oriented-Software-Architecture-Patterns-for-Concurrent-and-Networked-Objects-Volume-2.pdf}, publisher = {John Wiley \& Sons}, author = {Schmidt, Douglas C and Stal, Michael and Rohnert, Hans and Buschmann, Frank}, year = {2013}, }
@article{brown_software_2013, title = {Software architecture for developers}, url = {http://static.codingthearchitecture.com/sddconf2014-software-architecture-for-developers-extract.pdf}, journal = {Coding the Architecture}, author = {Brown, Simon}, year = {2013}, }
@article{andrews_beyond_2013, title = {Beyond the tree of texts: {Building} an empirical model of scribal variation through graph analysis of texts and stemmata}, volume = {28}, issn = {0268-1145}, shorttitle = {Beyond the tree of texts}, url = {https://doi.org/10.1093/llc/fqt032}, doi = {10.1093/llc/fqt032}, abstract = {Stemmatology, or the reconstruction of the transmission history of texts, is a field that stands particularly to gain from digital methods. Many scholars already take stemmatic approaches that rely heavily on computational analysis of the collated text (e.g. Robinson and O’Hara 1996; Salemans 2000; Heikkilä 2005; Windram et al. 2008 among many others). Although there is great value in computationally assisted stemmatology, providing as it does a reproducible result and allowing access to the relevant methodological process in related fields such as evolutionary biology, computational stemmatics is not without its critics. The current state-of-the-art effectively forces scholars to choose between a preconceived judgment of the significance of textual differences (the Lachmannian or neo-Lachmannian approach, and the weighted phylogenetic approach) or to make no judgment at all (the unweighted phylogenetic approach). Some basis for judgment of the significance of variation is sorely needed for medieval text criticism in particular. By this, we mean that there is a need for a statistical empirical profile of the text-genealogical significance of the different sorts of variation in different sorts of medieval texts. The rules that apply to copies of Greek and Latin classics may not apply to copies of medieval Dutch story collections; the practices of copying authoritative texts such as the Bible will most likely have been different from the practices of copying the Lives of local saints and other commonly adapted texts. It is nevertheless imperative that we have a consistent, flexible, and analytically tractable model for capturing these phenomena of transmission. In this article, we present a computational model that captures most of the phenomena of text variation, and a method for analysis of one or more stemma hypotheses against the variation model. We apply this method to three ‘artificial traditions’ (i.e. texts copied under laboratory conditions by scholars to study the properties of text variation) and four genuine medieval traditions whose transmission history is known or deduced in varying degrees. Although our findings are necessarily limited by the small number of texts at our disposal, we demonstrate here some of the wide variety of calculations that can be made using our model. Certain of our results call sharply into question the utility of excluding ‘trivial’ variation such as orthographic and spelling changes from stemmatic analysis.}, number = {4}, urldate = {2023-08-26}, journal = {Literary and Linguistic Computing}, author = {Andrews, Tara L. and Macé, Caroline}, month = dec, year = {2013}, pages = {504--521}, }
@book{mcgillivray_methods_2013, title = {Methods in {Latin} {Computational} {Linguistics}}, isbn = {978-90-04-26012-2}, abstract = {In Methods in Latin Computational Linguistics, Barbara McGillivray presents some of the most significant methodological foundations of the emerging field of Latin Computational Linguistics. The reader will find an overview of the computational resources and tools available for Latin and three corpus case studies covering morpho-syntactic and lexical-semantic aspects of Latin verb valency, as well as quantitative diachronic explorations of the argument realization of Latin prefixed verbs. The computational models and the multivariate data analysis techniques employed are explained with a detailed but accessible language. Barbara McGillivray convincingly shows the challenges and opportunities of combining computational methods and historical language data, and contributes to driving the technological change that is affecting Historical Linguistics and the Humanities.}, language = {en}, publisher = {BRILL}, author = {McGillivray, Barbara}, month = nov, year = {2013}, note = {Google-Books-ID: 33dfAgAAQBAJ}, keywords = {Computers / Artificial Intelligence / Natural Language Processing, Language Arts \& Disciplines / Linguistics / Etymology, Language Arts \& Disciplines / Linguistics / General, Language Arts \& Disciplines / Linguistics / Historical \& Comparative}, }
@article{coffee_tesserae_2013, title = {The {Tesserae} {Project}: intertextual analysis of {Latin} poetry}, volume = {28}, issn = {0268-1145, 1477-4615}, shorttitle = {The {Tesserae} {Project}}, url = {https://academic.oup.com/dsh/article-lookup/doi/10.1093/llc/fqs033}, doi = {10.1093/llc/fqs033}, abstract = {Tesserae is a web-based tool for automatically detecting allusions in Latin poetry. Although still in the start-up phase, it already is capable of identifying significant numbers of known allusions, as well as similar numbers of allusions previously unnoticed by scholars. In this article, we use the tool to examine allusions to Vergil’s Aeneid in the first book of Lucan’s Civil War. Approximately 3,000 linguistic parallels returned by the program were compared with a list of known allusions drawn from commentaries. Each was examined individually and graded for its literary significance, in order to benchmark the program’s performance. All allusions from the program and commentaries were then pooled in order to examine broad patterns in Lucan’s allusive techniques which were largely unapproachable without digital methods. Although Lucan draws relatively constantly from Vergil’s generic language in order to maintain the epic idiom, this baseline is punctuated by clusters of pointed allusions, in which Lucan frequently subverts Vergil’s original meaning. These clusters not only attend the most significant characters and events but also play a role in structuring scene transitions. Work is under way to incorporate the ability to match on word meaning, phrase context, as well as metrical and phonological features into future versions of the program.}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {Literary and Linguistic Computing}, author = {Coffee, N. and Koenig, J.-P. and Poornima, S. and Forstall, C. W. and Ossewaarde, R. and Jacobson, S. L.}, month = jun, year = {2013}, pages = {221--228}, }
@inproceedings{koppel_automatically_2013, address = {Seattle, Washington, USA}, title = {Automatically {Identifying} {Pseudepigraphic} {Texts}}, url = {https://aclanthology.org/D13-1151}, urldate = {2023-07-23}, booktitle = {Proceedings of the 2013 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing}}, publisher = {Association for Computational Linguistics}, author = {Koppel, Moshe and Seidman, Shachar}, month = oct, year = {2013}, pages = {1449--1454}, }
@article{martinez_partspeech_2012, title = {Part‐of‐speech tagging}, volume = {4}, issn = {1939-5108}, url = {https://wires.onlinelibrary.wiley.com/doi/pdfdirect/10.1002/wics.195}, number = {1}, journal = {Wiley Interdisciplinary Reviews: Computational Statistics}, author = {Martinez, Angel R}, year = {2012}, note = {Publisher: Wiley Online Library}, pages = {107--113}, }
@article{crane_introduction_2012, title = {Introduction to the special issue on corpus and computational linguistics, philology, and the linguistic heritage of humanity}, volume = {5}, issn = {1556-4673}, url = {https://doi.org/10.1145/2160165.2160166}, doi = {10.1145/2160165.2160166}, abstract = {The articles in this issue make two complementary assertions: first, language and linguistic sources are a key element of human cultural heritage and, second, we need to integrate the ancient goals of philology with rapidly emerging methods from fields such as Corpus and Computational Linguistics. The first 15,000,000 volumes digitized by Google contained data from more than 400 languages covering more than four thousand years of the human record. We need to develop methods to explore linguistic changes and the ideas that languages encode as these evolve and circulate over millennia and on a global scale.}, number = {1}, urldate = {2023-08-26}, journal = {Journal on Computing and Cultural Heritage}, author = {Crane, Gregory and Lüdeling, Anke}, month = apr, year = {2012}, keywords = {Computational linguistics, corpus linguistics, philology}, pages = {1:1--1:5}, }
@inproceedings{buchler_increasing_2012, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Increasing {Recall} for {Text} {Re}-use in {Historical} {Documents} to {Support} {Research} in the {Humanities}}, isbn = {978-3-642-33290-6}, doi = {10.1007/978-3-642-33290-6_11}, abstract = {High precision text re-use detection allows humanists to discover where and how particular authors are quoted (e.g., the different sections of Plato’s work that come in and out of vogue). This paper reports on on-going work to provide the high recall text re-use detection that humanists often demand. Using an edition of one Greek work that marked quotations and paraphrases from the Homeric epics as our testbed, we were able to achieve a recall of at least 94\% while maintaining a precision of 73\%. This particular study is part of a larger effort to detect text re-use across 15 million words of Greek and 10 million words of Latin available or under development as openly licensed TEI XML.}, language = {en}, booktitle = {Theory and {Practice} of {Digital} {Libraries}}, publisher = {Springer}, author = {Büchler, Marco and Crane, Gregory and Moritz, Maria and Babeu, Alison}, editor = {Zaphiris, Panayiotis and Buchanan, George and Rasmussen, Edie and Loizides, Fernando}, year = {2012}, keywords = {Athenaeus, Homer, historical text re-use, hypertextuality}, pages = {95--100}, }
@article{mimno_computational_2012, title = {Computational historiography: {Data} mining in a century of classics journals}, volume = {5}, issn = {1556-4673, 1556-4711}, shorttitle = {Computational historiography}, url = {https://dl.acm.org/doi/10.1145/2160165.2160168}, doi = {10.1145/2160165.2160168}, abstract = {More than a century of modern Classical scholarship has created a vast archive of journal publications that is now becoming available online. Most of this work currently receives little, if any, attention. The collection is too large to be read by any single person and mostly not of sufficient interest to warrant traditional close reading. This article presents computational methods for identifying patterns and testing hypotheses about Classics as a field. Such tools can help organize large collections, introduce younger scholars to the history of the field, and act as a “survey,” identifying anomalies that can be explored using more traditional methods.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {Journal on Computing and Cultural Heritage}, author = {Mimno, David}, month = apr, year = {2012}, pages = {1--19}, }
@article{korkiakangas_challenges_2011, title = {Challenges in {Annotating} {Medieval} {Latin} {Charters}}, volume = {26}, number = {2}, journal = {Journal for Language Technology and Computational Linguistics (JLCL)}, author = {Korkiakangas, T.}, year = {2011}, pages = {105--116}, }
@inproceedings{bamman_ancient_2011, address = {Berlin, Heidelberg}, series = {Theory and {Applications} of {Natural} {Language} {Processing}}, title = {The {Ancient} {Greek} and {Latin} {Dependency} {Treebanks}}, isbn = {978-3-642-20227-8}, doi = {10.1007/978-3-642-20227-8_5}, abstract = {This paper describes the development, composition, and several uses of the Ancient Greek and Latin Dependency Treebanks, large collections of Classical texts in which the syntactic, morphological and lexical information for eachword is made explicit. To date, over 200 individuals from around the world have collaborated to annotate over 350,000 words, including the entirety of Homer’s Iliad and Odyssey, Sophocles’ Ajax, all of the extant works of Hesiod and Aeschylus, and selections from Caesar, Cicero, Jerome, Ovid, Petronius, Propertius, Sallust and Vergil. While perhaps the most straightforward value of such an annotated corpus for Classical philology is the morphosyntactic searching it makes possible, it also enables a large number of downstream tasks as well, such as inducing the syntactic behavior of lexemes and automatically identifying similar passages between texts.}, language = {en}, booktitle = {Language {Technology} for {Cultural} {Heritage}}, publisher = {Springer}, author = {Bamman, David and Crane, Gregory}, editor = {Sporleder, Caroline and van den Bosch, Antal and Zervanou, Kalliopi}, year = {2011}, keywords = {Ancient Greek, Latin, dependency grammar, digital libraries, treebanks}, pages = {79--98}, }
@misc{rehurek_software_2010, address = {Valetta, MT}, title = {Software {Framework} for {Topic} {Modelling} with {Large} {Corpora}}, copyright = {LGPL-2.1}, shorttitle = {Gensim}, url = {http://is.muni.cz/publication/884893/en}, abstract = {Topic Modelling for Humans}, urldate = {2024-02-27}, publisher = {University of Malta}, author = {Řehůřek, Radim and Sojka, Petr}, month = may, year = {2010}, note = {Pages: 45–50 Series: Proceedings of LREC 2010 workshop New Challenges for NLP Frameworks original-date: 2011-02-10T07:43:04Z}, }
@article{heyer_challenges_2010, title = {Some {Challenges} {Posed} to {Computer} {Science} by the {eHumanities}}, url = {https://dl.gi.de/server/api/core/bitstreams/d87b4505-32b1-4f27-97cd-3f20aed8d4be/content}, language = {en}, author = {Heyer, Gerhard and Büchler, Marco}, year = {2010}, pages = {524--529}, }
@article{neron_proper_2010, title = {Proper {Generalized} {Decomposition} for {Multiscale} and {Multiphysics} {Problems}}, volume = {17}, issn = {1886-1784}, url = {https://doi.org/10.1007/s11831-010-9053-2}, doi = {10.1007/s11831-010-9053-2}, abstract = {This paper is a review of the developments of the Proper Generalized Decomposition (PGD) method for the resolution, using the multiscale/multiphysics LATIN method, of the nonlinear, time-dependent problems ((visco)plasticity, damage, …) encountered in computational mechanics. PGD leads to considerable savings in terms of computing time and storage, and makes engineering problems which would otherwise be completely out of range of industrial codes accessible.}, language = {en}, number = {4}, urldate = {2023-08-26}, journal = {Archives of Computational Methods in Engineering}, author = {Néron, David and Ladevèze, Pierre}, month = dec, year = {2010}, keywords = {Discontinuous Galerkin Scheme, Domain Decomposition Method, Model Reduction Technique, Proper Generalize Decomposi, Reference Problem}, pages = {351--372}, }
@article{ladeveze_latin_2010, title = {The {LATIN} multiscale computational method and the {Proper} {Generalized} {Decomposition}}, volume = {199}, issn = {00457825}, url = {https://linkinghub.elsevier.com/retrieve/pii/S0045782509002643}, doi = {10.1016/j.cma.2009.06.023}, abstract = {This paper deals with the synergy between the LATIN multiscale method and what is called the Proper Generalized Decomposition (PGD) which is the key of its performances.}, language = {en}, number = {21-22}, urldate = {2023-08-26}, journal = {Computer Methods in Applied Mechanics and Engineering}, author = {Ladevèze, P. and Passieux, J.-C. and Néron, D.}, month = apr, year = {2010}, pages = {1287--1296}, }
@article{bamman_computational_2009, title = {Computational {Linguistics} and {Classical} {Lexicography}}, copyright = {© 2009. This work is published under http://creativecommons.org/licenses/by-nd/4.0/ (the “License”). Notwithstanding the ProQuest Terms and Conditions, you may use this content in accordance with the terms of the License.}, url = {https://www.proquest.com/docview/2555208382?pq-origsite=gscholar&fromopenview=true}, abstract = {Manual lexicography has produced extraordinary results for Greek and Latin, but it cannot in the immediate future provide for all texts the same level of coverage available for the most heavily studied materials. As we build a cyberinfrastructure for Classics in the future, we must explore the role that automatic methods can play within it. Using technologies inherited from the disciplines of computational linguistics and computer science, we can create a complement to these traditional reference works - a dynamic lexicon that presents statistical information about a word’s usage in context, including information about its sense distribution within various authors, genres and eras, and syntactic information as well.}, language = {English}, urldate = {2023-08-26}, author = {Bamman, David and Crane, Gregory}, year = {2009}, note = {Section: Articles}, }
@article{bamman_case_2008, title = {A {Case} {Study} in {Treebank} {Collaboration} and {Comparison} : {Accusativus} {Cum} {Infinitivo} and {Subordination} in {Latin}}, volume = {90}, journal = {The Prague bulletin of mathematical linguistics}, author = {Bamman, David and Passarotti, Marco and Crane, Gregory R.}, year = {2008}, pages = {109--122}, }
@article{binongo_application_1999, title = {The application of principal component analysis to stylometry}, volume = {14}, issn = {0268-1145}, url = {https://doi.org/10.1093/llc/14.4.445}, doi = {10.1093/llc/14.4.445}, abstract = {In recent years principal component analysis has become popular for investigations in computational stylistics, particularly for studies of authorship. The mathematical nature of the theory that underpins the method makes it rather inaccessible to linguists and literary scholars. Consequently, confidence in its correct application is diminished. By first restricting the procedure to the use of two marker words, a pictorial description of its operation is derived. Some characteristics of the method are then examined. Finally, in the context of a Shakespearean example the technique is extended to p words, and suggestions are advanced to alleviate possible shortcomings.}, number = {4}, urldate = {2023-07-23}, journal = {Literary and Linguistic Computing}, author = {Binongo, JNG and Smith, MWA}, month = dec, year = {1999}, pages = {445--466}, }
@article{gulordava_diachronic_nodate, title = {Diachronic {Trends} in {Word} {Order} {Freedom} and {Dependency} {Length} in {Dependency}-{Annotated} {Corpora} of {Latin} and {Ancient} {Greek}}, url = {https://aclanthology.org/W15-2115.pdf}, author = {Gulordava, Kristina and Merlos, Paola}, }
@book{noauthor_diachronic_nodate, address = {Amsterdam; Philadelphia}, series = {Benjamins {Current} {Topics}}, title = {Diachronic {Treebanks} for {Historical} {Linguistics}}, url = {doi:10.1075/bct.113}, number = {113}, publisher = {Benjamins}, }
@article{kabala_computational_nodate, title = {Computational {Authorship} {Attribution} in {Medieval} {Latin} {Corpora} : {The} {Case} of the {Monk} of {Lido} (ca. 1101–08) and {Gallus} {Anonymous} (ca. 1113–17)}, volume = {54}, doi = {doi:10.1007/s10579-018-9424-0.}, number = {1}, journal = {Language resources and evaluation}, author = {Kabala, Jakub}, pages = {25--56}, }
@techreport{martinez_garcia_latin-spanish_nodate, title = {Latin-{Spanish} {Neural} {Machine} {Translation} : {From} the {Bible} to {Saint} {Augustine}.}, number = {Proceedings of the LREC 2020 1st Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2020)}, author = {Martínez Garcia, Eva and García-Tejedor, Álvaro J.}, pages = {94--99}, }
@article{janicke_visualizations_nodate, title = {Visualizations for {Text} {Re}-use}, url = {https://www.informatik.uni-leipzig.de/~stjaenicke/Visualizations_for_Text-Reuse.pdf}, language = {en}, author = {Janicke, Stefan and Geßner, Annette and Buchler, Marco and Scheuermann, Gerik}, }
@article{silvia_visualizing_nodate, title = {Visualizing {Variation} in {Classical} {Text} with {Force} {Directed} {Storylines}}, abstract = {The study of literature is changing dramatically by incorporating new opportunities that digital technology presents. Data visualization overturns the dynamic for literary analysis by revealing and displaying connections and patterns between elements in text. Literary scholars compare and analyze textual variations in different versions of a lost original text and work to reconstruct the original text in the form of a critical edition. A critical edition notes textual variations in extensive footnotes, collectively called a critical apparatus. Information in the apparatus is of great interest to scholars who seek to explore complex relationships between text versions. Motivated by application to classical Latin texts, we adapted the storyline technique to visualize a critical apparatus. The visualization facilitates guided discovery of similarities and dissimilarities between prior text versions, which are difficult to detect and reason about with traditional deep reading and spreadsheet-based methods. Storyline visualizations help users understand and analyze the interactions between entities in a story and explore how entity relationships evolve over time. Typical design considerations in existing storyline techniques include minimizing line crossing and line wiggling, which are computationally intense problems. Generating storyline layouts in real time is a substantial challenge to interactive visualization. Existing storyline techniques support limited user interaction due to the high cost of layout. We contribute an initial force directed layout algorithm that dynamically reflows storyline layouts with best effort response to internal and coordinated interactions. We anticipate that the characteristics of our layout algorithm will allow for graceful response to a wide variety of interaction types, speeds, and patterns. We conducted a user study to evaluate the legibility of our storyline layout after convergence. The evaluation results demonstrate that most users can accurately complete a wide variety of visual metaphor interpretation, reading, and pattern recognition tasks within 20 seconds.}, language = {en}, author = {Silvia, Shejuti and Etemadpour, Ronak and Abbas, June and Huskey, Sam and Weaver, Chris}, }