<script src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fschulzkx%2F5158478&jsonp=1"></script>
<?php
$contents = file_get_contents("https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fschulzkx%2F5158478");
print_r($contents);
?>
<iframe src="https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fzotero-group%2Fschulzkx%2F5158478"></iframe>
For more details see the documention.
To the site owner:
Action required! Mendeley is changing its API. In order to keep using Mendeley with BibBase past April 14th, you need to:
@inproceedings{beyer_textanalyse_2025, address = {Baden-Baden}, series = {Paradeigmata}, title = {Textanalyse mit {KI}: {Wie} kommt sie in die {Klassische} {Philologie}?}, booktitle = {Digitalität im {Unterricht} der {Alten} {Sprachen}}, publisher = {Nomos-Verlag}, author = {Beyer, Andrea}, year = {2025}, }
@misc{kotschka_neu_2025, address = {Berlin}, title = {Neu ist immer besser? - {Texteditionen} und digitale {Methoden}}, url = {https://zenodo.org/records/14771606}, author = {Kotschka, Florian and Beyer, Andrea}, month = jan, year = {2025}, }
@misc{beyer_llm_2025, address = {Würzburg}, title = {{LLM} \& {NLP} in der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/14679224}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2025}, }
@misc{schulz_responsible_2025, title = {Responsible {AI} \& {Dokumentation}}, url = {https://zenodo.org/records/14716501}, abstract = {Diese Präsentation widmet sich der verantwortungsvollen Entwicklung und Dokumentation von KI-Systemen. Sie bietet einen Überblick über zentrale ethische Herausforderungen und deren Bedeutung in der Praxis. Unter Berücksichtigung aktueller Forschungsarbeiten werden Schlüsselthemen wie Werteorientierung bei Datensätzen, Bias-Erkennung sowie die Integration ethischer Normen im Entwicklungsprozess beleuchtet. Ergänzt wird dies durch Ansätze wie das "Red Teaming" von Sprachmodellen, um potenzielle Schwachstellen offenzulegen. Ein weiterer Fokus liegt auf den Anforderungen an die Dokumentation von KI-Systemen gemäß EU AI Act, einschließlich Aspekten wie Transparenz, menschlicher Aufsicht, Cybersicherheit und Risikoanalyse. Die Präsentation veranschaulicht auch die Initiativen der Humboldt-Universität zu Berlin, darunter Leitlinien zur Nutzung generativer KI in Forschung und Lehre sowie deren Einbettung in eine breite Governance-Strategie. Abschließend werden praktische Instrumente wie Model Cards und Data Statements vorgestellt, die die strukturierte Dokumentation von KI-Systemen fördern. Diese Ansätze bieten eine Grundlage für ethisch und technisch fundierte KI-Lösungen und unterstützen die Schaffung eines öffentlichen Transparenzstandards, wie er in Städten wie Helsinki und Amsterdam umgesetzt wird.}, language = {deu}, urldate = {2025-01-22}, author = {Schulz, Konstantin}, month = jan, year = {2025}, doi = {10.5281/zenodo.14716501}, keywords = {Artificial Intelligence, Artificial intelligence, Natural Language Processing, Natural language processing}, }
@misc{beyer_llm_2025, title = {{LLM} \& {NLP} in der {Klassischen} {Philologie}: {Digitale} {Literaturwissenschaft} für alle?}, shorttitle = {{LLM} \& {NLP} in der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/14679224}, abstract = {Die Präsentation "LLM \& NLP in der Klassischen Philologie: Digitale Literaturwissenschaft für alle?" beleuchtet den Einsatz von Natural Language Processing (NLP) und Large Language Models (LLMs) in der Analyse antiker Texte, mit besonderem Fokus auf Sallusts Monographien Bellum Catilinae und Bellum Iugurthinum. Im Zentrum steht die Frage nach den emotionalen Nuancen der Protagonisten Catilina und Iugurtha, die durch Sentiment-Analysen untersucht werden. Durch die Kombination moderner NLP-Methoden und spezialisierter LLMs wie GPT-4 und Gemini-1.5-Pro wird eine Polaritätsbewertung der Texte vorgenommen. Neben der Vorstellung lexikonbasierter Ansätze und Limitierungen wird die Entwicklung spezifischer Prompting-Strategien für LLMs diskutiert. Methodische Herausforderungen wie geringe Sentiment-Quote und Unterschiede in der Modellarchitektur werden thematisiert, ebenso wie Chancen durch Multimodalität und datenbasierte Erweiterungen. Die Ergebnisse liefern Einblicke in die emotionale Charakterisierung und eröffnen neue Perspektiven für die digitale Literaturwissenschaft. Abschließend wird die Zukunft digitaler Methoden in den Geisteswissenschaften erörtert, einschließlich der Bedeutung interdisziplinärer Zusammenarbeit und der Nutzung von Retrieval-Augmented Generation (RAG) sowie Low-Rank Adaptation (LoRA) für historische Sprachen.}, language = {deu}, urldate = {2025-01-22}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2025}, doi = {10.5281/zenodo.14679224}, keywords = {Artificial Intelligence, Artificial intelligence, Classics, Languages and literature, Latin, Literature, Literature studies, Literature study, Natural Language Processing, Natural language processing}, }
@inproceedings{beyer_daidalos_2024, address = {Passau}, title = {Daidalos: {Wie} viel {Methodenkompetenz} braucht ein {User}?}, url = {https://zenodo.org/records/10698299}, doi = {10.5281/zenodo.10698299}, booktitle = {Book of {Abstracts} - {DHd2024}}, author = {Beyer, Andrea and Schulz, Konstantin}, month = feb, year = {2024}, pages = {336--338}, }
@misc{sankarapu_dlbacktrace_2024, title = {{DLBacktrace}: {A} {Model} {Agnostic} {Explainability} for any {Deep} {Learning} {Models}}, shorttitle = {{DLBacktrace}}, url = {http://arxiv.org/abs/2411.12643}, doi = {10.48550/arXiv.2411.12643}, abstract = {The rapid advancement of artificial intelligence has led to increasingly sophisticated deep learning models, which frequently operate as opaque 'black boxes' with limited transparency in their decision-making processes. This lack of interpretability presents considerable challenges, especially in high-stakes applications where understanding the rationale behind a model's outputs is as essential as the outputs themselves. This study addresses the pressing need for interpretability in AI systems, emphasizing its role in fostering trust, ensuring accountability, and promoting responsible deployment in mission-critical fields. To address the interpretability challenge in deep learning, we introduce DLBacktrace, an innovative technique developed by the AryaXAI team to illuminate model decisions across a wide array of domains, including simple Multi Layer Perceptron (MLPs), Convolutional Neural Networks (CNNs), Large Language Models (LLMs), Computer Vision Models, and more. We provide a comprehensive overview of the DLBacktrace algorithm and present benchmarking results, comparing its performance against established interpretability methods, such as SHAP, LIME, GradCAM, Integrated Gradients, SmoothGrad, and Attention Rollout, using diverse task-based metrics. The proposed DLBacktrace technique is compatible with various model architectures built in PyTorch and TensorFlow, supporting models like Llama 3.2, other NLP architectures such as BERT and LSTMs, computer vision models like ResNet and U-Net, as well as custom deep neural network (DNN) models for tabular data. This flexibility underscores DLBacktrace's adaptability and effectiveness in enhancing model transparency across a broad spectrum of applications. The library is open-sourced and available at https://github.com/AryaXAI/DLBacktrace .}, urldate = {2025-01-26}, publisher = {arXiv}, author = {Sankarapu, Vinay Kumar and Chitroda, Chintan and Rathore, Yashwardhan and Singh, Neeraj Kumar and Seth, Pratinav}, month = nov, year = {2024}, note = {arXiv:2411.12643 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Machine Learning}, }
@article{keersmaekers_adapting_2024, title = {Adapting transformer models to morphological tagging of two highly inflectional languages: a case study on {Ancient} {Greek} and {Latin}}, shorttitle = {Adapting transformer models to morphological tagging of two highly inflectional languages}, url = {https://aclanthology.org/2024.ml4al-1.17}, doi = {10.18653/v1/2024.ml4al-1.17}, abstract = {Natural language processing for Greek and Latin, inflectional languages with small corpora, requires special techniques. For morphological tagging, transformer models show promising potential, but the best approach to use these models is unclear. For both languages, this paper examines the impact of using morphological lexica, training different model types (a single model with a combined feature tag, multiple models for separate features, and a multi-task model for all features), and adding linguistic constraints. We find that, although simply fine-tuning transformers to predict a monolithic tag may already yield decent results, each of these adaptations can further improve tagging accuracy.}, language = {en}, urldate = {2025-01-26}, journal = {Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)}, author = {Keersmaekers, Alek and Mercelis, Wouter}, year = {2024}, note = {Conference Name: Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024) Place: Hybrid in Bangkok, Thailand and online Publisher: Association for Computational Linguistics}, pages = {165--176}, }
@article{soffiantini_cross-linguistic_2024, title = {Cross-linguistic annotation transfer in geoparsing experiments with {Classical} texts}, volume = {6}, url = {https://journal.dhbenelux.org/wp-content/uploads/2024/11/9_Soffiantini_individual.pdf}, abstract = {The Natural History is an encyclopedic work written by the Latin author Pliny the Elder (first century CE). In this extensive text in 37 books, geography plays a pivotal role, with hundreds of mentions of ancient place names. In this paper, a geoparsing experiment is conducted on the Natural History with the scope of automatically identifying and extracting place entities. To achieve this, we take advantage of state-of-the-art NLP models to develop a multistage pipeline involving English Named Entity Recognition, English-Latin sentence alignment, and entity projection. The paper demonstrates how cross-lingual annotation transfer can be applied from a translation in a modern language back to the original text in the context of low-/medium-resource languages, such as Latin. The efficacy of the proposed pipeline is evaluated through the use of both standard metrics and a comprehensive manual error analysis. Additionally, the results are compared to those obtained by other Latin NER tools. Both analyses demonstrate that the proposed methodology achieves a superior f1-score. Finally, the majority of place entities were automatically associated with unique identifiers that enable geolocation by the projection of pre-disambiguated annotations derived from another geo-spatial project.}, urldate = {2025-01-26}, journal = {DH Benelux Journal}, author = {Soffiantini, Laura}, year = {2024}, pages = {155--168}, }
@article{beersmans_gotta_2024, title = {“{Gotta} catch ‘em all!”: {Retrieving} people in {Ancient} {Greek} texts combining transformer models and domain knowledge}, shorttitle = {“{Gotta} catch ‘em all!”}, url = {https://aclanthology.org/2024.ml4al-1.16.pdf}, doi = {10.18653/v1/2024.ml4al-1.16}, abstract = {In this paper, we present a study of transformer-based Named Entity Recognition (NER) as applied to Ancient Greek texts, with an emphasis on retrieving personal names. Recent research shows that, while the task remains difficult, the use of transformer models results in significant improvements. We, therefore, compare the performance of four transformer models on the task of NER for the categories of people, locations and groups, and add an out-of-domain test set to the existing datasets. Results on this set highlight the shortcomings of the models when confronted with a random sample of sentences. To be able to more straightforwardly integrate domain and linguistic knowledge to improve performance, we narrow down our approach to the category of people. The task is simplified to a binary PERS/MISC classification on the token level, starting from capitalised words. Next, we test the use of domain and linguistic knowledge to improve the results. We find that including simple gazetteer information as a binary mask has a marginally positive effect on newly annotated data and that treebanks can be used to help identify multi-word individuals if they are scarcely or inconsistently annotated in the available training data. The qualitative error analysis identifies the potential for improvement in both manual annotation and the inclusion of domain and linguistic knowledge in the transformer models.}, language = {en}, urldate = {2025-01-26}, journal = {Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)}, author = {Beersmans, Marijke and Keersmaekers, Alek and De Graaf, Evelien and Van De Cruys, Tim and Depauw, Mark and Fantoli, Margherita}, year = {2024}, note = {Conference Name: Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024) Place: Hybrid in Bangkok, Thailand and online Publisher: Association for Computational Linguistics}, pages = {152--164}, }
@inproceedings{palladino_development_2024, title = {Development of {Robust} {NER} {Models} and {Named} {Entity} {Tagsets} for {Ancient} {Greek}}, url = {https://aclanthology.org/2024.lt4hala-1.11.pdf}, abstract = {This contribution presents a novel approach to the development and evaluation of transformer-based models for Named Entity Recognition and Classification in Ancient Greek texts. We trained two models with annotated datasets by consolidating potentially ambiguous entity types under a harmonized set of classes. Then, we tested their performance with out-of-domain texts, reproducing a real-world use case. Both models performed very well under these conditions, with the multilingual model being slightly superior on the monolingual one. In the conclusion, we emphasize current limitations due to the scarcity of high-quality annotated corpora and to the lack of cohesive annotation strategies for ancient languages.}, urldate = {2025-01-26}, author = {Palladino, Chiara and Yousef, Tariq}, year = {2024}, }
@inproceedings{sullutrone_large_2024, title = {Large {Language} {Models} integration in {Digital} {Humanities}}, url = {https://www.semanticscholar.org/paper/Large-Language-Models-integration-in-Digital-Sullutrone/8d127bc2f01fab76ea9fdad8706d7f58fd4b8308}, abstract = {The exponential growth of available data to Digital Humanities (DH) has created an impending need for tools capable of analyzing and extracting information from multi-lingual historical documents. This paper explores the research directions of my PhD project: providing DH scholars with effective, efficient, and explainable tools based on recent advancements in Large Language Models (LLMs). Two are the main directions of investigation: Self-Improving LLMs applied to Text-to-SQL and Topic Modeling, with a focus on interacting with and augmenting existing DBMS; Knowledge Graph (KG) creation and integration to mitigate hallucination, improve transparency and reasoning in question-answering systems. At the heart of my research lies the Digital Maktaba (DM) project which seeks to create a digital library for assisting in the preservation and analysis of multicultural non-latin heritage documents using, among others, cutting edge techniques for Natural Language Processing (NLP) and Data Science. The DM objectives and ideals align with the ultimate goal of the PhD project: the creation of instruments capable of aiding human-data interaction and information extraction while keeping the user at the center of an ever-evolving system. These tools have the potential to revolutionize the way DH scholars interact with historical documents, leading to new insights and discoveries for the field at large.}, urldate = {2025-01-15}, author = {Sullutrone, Giovanni}, year = {2024}, }
@article{beyer_rezension_2024, series = {Latein und {Griechisch} in {Berlin} und {Brandenburg}}, title = {Rezension zu „{KI}-{Bildung} im {Lateinunterricht} – {Ein} schulpraktischer {Leitfaden}“ der {Reihe} {KI}-{Bildung} im {Ovid} {Verlag}, {Hrsg}. {Rudolf} {Henneböhl}}, volume = {68}, issn = {0945-2257}, url = {https://davbb.de/sammlung-ausgaben-lgbb/lgbb-2-2024/}, number = {2}, journal = {LGBB}, author = {Beyer, Andrea}, year = {2024}, pages = {148--152}, }
@article{beyer_ki_2024, title = {{KI} im altsprachlichen {Unterricht}}, volume = {5}, url = {https://www.biejournals.de/index.php/lgnrw/article/view/7309/6512}, doi = {https://doi.org/10.11576/lgnrw-7309}, number = {1}, journal = {LGNRW}, author = {Beyer, Andrea}, year = {2024}, pages = {9--15}, }
@inproceedings{schulz_seflag_2024, address = {Miami, USA}, title = {{SEFLAG}: {Systematic} {Evaluation} {Framework} for {NLP} {Models} and {Datasets} in {Latin} and {Ancient} {Greek}}, shorttitle = {{SEFLAG}}, url = {https://aclanthology.org/2024.nlp4dh-1.24}, abstract = {Literary scholars of Latin and Ancient Greek increasingly use natural language processing for their work, but many models and datasets are hard to use due to a lack of sustainable research data management. This paper introduces the Systematic Evaluation Framework for natural language processing models and datasets in Latin and Ancient Greek (SEFLAG), which consistently assesses language resources using common criteria, such as specific evaluation metrics, metadata and risk analysis. The framework, a work in progress in its initial phase, currently covers lemmatization and named entity recognition for both languages, with plans for adding dependency parsing and other tasks. For increased transparency and sustainability, a thorough documentation is included as well as an integration into the HuggingFace ecosystem. The combination of these efforts is designed to support researchers in their search for suitable models.}, urldate = {2024-11-12}, booktitle = {Proceedings of the 4th {International} {Conference} on {Natural} {Language} {Processing} for {Digital} {Humanities}}, publisher = {Association for Computational Linguistics}, author = {Schulz, Konstantin and Deichsler, Florian}, editor = {Hämäläinen, Mika and Öhman, Emily and Miyagawa, So and Alnajjar, Khalid and Bizzoni, Yuri}, month = nov, year = {2024}, pages = {247--258}, }
@misc{beyer_ki_2024, title = {{KI} und Übersetzen – {Literaturunterricht} {Latein}}, url = {https://zenodo.org/records/14063591}, abstract = {In diesem Webinar werden vier Themenkomplexe interaktiv erarbeitet: Was ist Übersetzen? Was ist Maschinelles Übersetzen? KI und Übersetzungen literarischer Texte KI und Übersetzungshilfen}, language = {deu}, urldate = {2024-11-12}, author = {Beyer, Andrea}, month = nov, year = {2024}, doi = {10.5281/zenodo.14063591}, keywords = {AI, Latin, Machine Translation, Translation}, }
@misc{beyer_ki_2024, title = {{KI} im {Spracherwerb} der historischen {Sprache} {Latein}}, url = {https://zenodo.org/records/14033264}, abstract = {Der Vortrag befasst sich mit der Rolle und den Einsatzmöglichkeiten von Künstlicher Intelligenz (KI) beim Erwerb der historischen Sprache Latein. Die Präsentation gliedert sich in vier Abschnitte: Grundlagen: Es erfolgt eine Einführung in die Termini KI, Spracherwerb und Latein. Dabei werden die Besonderheiten des Lateinischen als historische Sprache und die spezifischen Herausforderungen beim Spracherwerb dargestellt. KI-Bildung (Bildungswissenschaft): Es werden verschiedene Kompetenzbereiche der KI-Bildung vorgestellt und anhand eines Fallbeispiels für den Lateinunterricht konkretisiert. Spracherwerb (Psycholinguistik): Es werden Möglichkeiten aufgezeigt, wie KI zur Modellierung des lateinischen Spracherwerbs genutzt werden kann, etwa durch den Transfer moderner Erwerbsstufenmodelle oder die Entwicklung von Personas zur Generierung synthetischer Sprachdaten. KI als Tutor (Didaktik): Es erfolgt ein knapper Überblick über den Einsatz von KI-Chatbots als Tutor und Intelligente Tutoring Systeme (ITS). Dabei werden Themen wie individualisierte Lernunterstützung, adaptives Feedback und personalisierte Lernpfade angeschnitten.}, language = {deu}, urldate = {2024-11-04}, author = {Beyer, Andrea}, month = nov, year = {2024}, doi = {10.5281/zenodo.14033264}, keywords = {AI, Historical Language, Language Acquisition, Latin}, }
@misc{beyer_daidalos-projekt_2024, title = {Daidalos-{Projekt} - {Entwicklung} einer {Infrastruktur} zum {Einsatz} von {Natural} {Language} {Processing} für {Forschende} der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/12635794}, abstract = {Project proposal approved by the German Research Council as part of their Funding Programme "e-Research Technologies" : https://www.dfg.de/en/research-funding/funding-opportunities/programmes/infrastructure/lis/funding-opportunities/e-research-technologies}, language = {deu}, urldate = {2024-07-03}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jul, year = {2024}, doi = {10.5281/zenodo.12635794}, note = {Publisher: Zenodo}, keywords = {Artificial intelligence, Classics, Computer and information sciences, Linguistics, Natural language processing}, }
@misc{beyer_digital_2024, title = {Digital {Literacies}}, url = {https://zenodo.org/records/10515036}, abstract = {The slides provide a brief overview of different concepts and frameworks relating to digital literacy, data literacy, and ai literacy.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = jan, year = {2024}, doi = {10.5281/zenodo.10515036}, }
@misc{beyer_ki_2024, title = {{KI} und {Lateinunterricht}}, url = {https://zenodo.org/records/10829822}, abstract = {Präsentationen zu einer Fortbildungsveranstaltung für Lehrkräfte in Jena, 16.3.2024 Teil 1: (generative) KI im Lateinunterricht inkl. einiger Aufgabenbeispiele und deren "Lösung" mittels KI Teil 2: Schwerpunktthema: Bewerten unter den Auspizien von KI}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = mar, year = {2024}, doi = {10.5281/zenodo.10829822}, keywords = {AI Literacy, KI, KI und Prüfen, KI-Bildung, KI-Ethik, Lateinunterricht}, }
@misc{beyer_nlp-infrastruktur_2024, title = {Eine {NLP}-{Infrastruktur} für {KI}-skeptische {User}}, url = {https://zenodo.org/records/12199989}, abstract = {The talk was given at the Research Lounge hosted by the Vice President for Research of Humboldt-Universität zu Berlin.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea and Kotschka, Florian}, month = jun, year = {2024}, doi = {10.5281/zenodo.12199989}, keywords = {Classics, Computer and information sciences, Literary Studies, Literature studies, NLP}, }
@misc{beyer_can_2024, title = {Can {Jupyter} {Help} {Daidalos}? {Or}: {How} to {Develop} {Digital} {Literacies} and {Assess} {Them}?}, shorttitle = {Can {Jupyter} {Help} {Daidalos}?}, url = {https://zenodo.org/records/12200296}, abstract = {The Daidalos project is developing an infrastructure that will enable researchers in Classical Philology and related disciplines to apply various methods of natural language processing to research corpora they have built themselves. While working closely with Classical Philologists to understand their needs in terms of functionality and design of the infrastructure, we have – not unexpectedly – encountered the biggest challenge to the success of our project: How can potential users develop research questions and investigate them using Daidalos if their research expertise does not include the necessary digital skills? Or the other way round: How can we assess the level of competence of our users in order to provide them with useful and possibly personalised support?We therefore decided to address this challenge by firstly developing a domain-specific and case-sensitive competency model of digital literacies (i. e. information, data and AI literacy) and secondly by providing curated learning materials in the form of Jupyter Notebooks, among other things. These Jupyter Notebooks are widely used in teaching and assessment because they are said to provide low-threshold access to programming through their function as interactive worksheets. Although they are unknown in our own community, we wanted to offer them as a way to improve one’s digital literacies. But when implementing the first Jupyter Notebook, we encountered a familiar problem: how granular should the content be prepared and explained? Or to put it more generally: Are Jupyter Notebooks really as accessible as they claim to be?After an introduction to the project, the concept of digital literacies used, and an example of a Jupyter Notebook prepared for different proficiency levels, we will discuss how researchers can be motivated to improve their domain-specific digital literacies in order to enhance their overall research and teaching expertise. [Roundtable discussion]}, language = {eng}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = jun, year = {2024}, doi = {10.5281/zenodo.12200296}, keywords = {Classics, Digital Literacies, Digital humanities, Jupyter Notebooks}, }
@misc{beyer_daidalos_2024, title = {Daidalos: {NER} for {Literary} {Studies} on {Latin} and {Ancient} {Greek} {Texts}}, shorttitle = {Daidalos}, url = {https://zenodo.org/records/12582628}, abstract = {The talk was given at the Conference "Nomina Omina" in Leipzig.}, language = {eng}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = jun, year = {2024}, doi = {10.5281/zenodo.12582628}, keywords = {Classics, Literature studies, NER, NLP}, }
@misc{beyer_fach-_2024, title = {Fach- und fallspezifische {KI}-{Bildung} in den {Geisteswissenschaften}}, url = {https://zenodo.org/records/13757021}, abstract = {Die Präsentation „Fach- und fallspezifische KI-Bildung in den Geisteswissenschaften“ beleuchtet die Bedeutung einer gezielten KI-Ausbildung für die Geisteswissenschaften. Im Zentrum steht die Notwendigkeit, KI-Kompetenzen zu entwickeln, um den Herausforderungen der digitalen Transformation in Forschung und Lehre gerecht zu werden. Die Einführung stellt dar, warum KI-Bildung für eine informierte Auseinandersetzung mit gesellschaftlichen und wissenschaftlichen Fragen unerlässlich ist, etwa bei der Erkennung von Bias, der Nutzung von KI-Tools im Alltag oder der Analyse von Deepfakes. Es wird ein mehrstufiges Konzept der KI-Bildung vorgestellt, das sich an Lernende ohne informatische Vorkenntnisse richtet und Fach- sowie Fallbeispiele aus den Geisteswissenschaften nutzt, um den praktischen Nutzen von KI-Anwendungen zu verdeutlichen. Der Schwerpunkt liegt auf der Vermittlung von AI Literacy, Data Literacy und Digital Literacy auf verschiedenen Kompetenzstufen – von Anfänger bis Experte. Dabei wird verdeutlicht, wie diese Kompetenzen gezielt für die Analyse historischer und literarischer Fragestellungen eingesetzt werden können. Abschließend wird die Rolle der KI-Bildung sowohl für Lehrende als auch für Forschende und Studierende in den Geisteswissenschaften betont.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2024}, doi = {10.5281/zenodo.13757021}, keywords = {Artificial intelligence, Classics, Educational sciences, Languages and literature, Natural language processing}, }
@misc{schulz_einfuhrung_2024, title = {Einführung in {Natural} {Language} {Processing} anhand von {Plinius}' {Brief} 1,8}, url = {https://zenodo.org/records/13907150}, abstract = {Die Präsentation behandelt die methodische Analyse von Plinius' Briefen, insbesondere die Herausforderungen der Selbstdarstellung in epist. 1,8. Verschiedene linguistische und computerlinguistische Verfahren werden eingesetzt, um den Text digital zu untersuchen. Zunächst wird die Lemmatisierung eingeführt, die die Reduzierung flektierter Formen auf ihre Grundform beschreibt. Dadurch wird eine bessere Durchsuchbarkeit und Vergleichbarkeit von Texten ermöglicht. Darauf folgt das Part-of-Speech-Tagging, bei dem Wortarten im Text annotiert werden. Probleme wie die Mehrdeutigkeit und Schwierigkeiten in den Trainingsdaten des Algorithmus werden hervorgehoben. Zudem wird eine Sentimentanalyse durchgeführt, die emotionale Bewertungen und Meinungen im Text identifiziert. Ferner kommen Word Embeddings zum Einsatz, um semantische Beziehungen zwischen Wörtern darzustellen. Abschließend wird die Syntax des Plinius-Briefs mittels Treebanking, also der systematischen Erfassung syntaktischer Abhängigkeiten zwischen Wörtern in einem Satz, analysiert. Der Vortrag zeigt, wie digitale Werkzeuge klassisch-philologische Fragestellungen unterstützen und neue Erkenntnisse über Plinius' Werk ermöglichen, insbesondere in Bezug auf die Darstellung von Ruhm, Bescheidenheit und Selbstreflexion.}, language = {deu}, urldate = {2024-10-09}, author = {Schulz, Konstantin}, month = may, year = {2024}, doi = {10.5281/zenodo.13907150}, keywords = {Classics, Computational Linguistics, Languages and literature, Latin, Natural Language Processing}, }
@misc{beyer_digitale_2024, title = {Digitale {Methoden} in der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/10529746}, abstract = {Folien zum Vortrag als Gastbeitrag in der Vorlesung »Grundfragen der lateinischen Literatur« an der Katholischen Universität Eichstätt-Ingolstadt}, language = {deu}, urldate = {2024-03-18}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2024}, doi = {10.5281/zenodo.10529746}, keywords = {Classics, computational literary studies, digital humanities, natural language processing}, }
@misc{beyer_generative_2024, title = {Generative {KI} und ihre {Bedeutung} für {Bewertungskontexte}}, url = {https://zenodo.org/records/10569149}, abstract = {Präsentation zu einer Fortbildungsveranstaltung zum Thema generative KI und Leistungsmessung an einem Gymnasium in NRW, 29.1.2024.}, language = {deu}, urldate = {2024-03-18}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2024}, doi = {10.5281/zenodo.10569149}, keywords = {AI, AI Literacy, KI Ethik, KI und Prüfungen, KI-Bildung, generative KI}, }
@misc{beyer_nlp-infrastruktur_2024, title = {{NLP}-{Infrastruktur} für die {Klassische} {Philologie}}, url = {https://zenodo.org/records/10474686}, abstract = {Folien zum Vortrag im Kolloquium "Phänomenologie der Digital Humanities" des Lehrstuhls für Digital Humanities an der Freien Universität Berlin: https://wikis.fu-berlin.de/display/phaenodh}, language = {deu}, urldate = {2024-03-18}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jan, year = {2024}, doi = {10.5281/zenodo.10474686}, keywords = {Classics, computational literary studies, natural language processing, research infrastructure}, }
@misc{beyer_nlp-methoden_2024, title = {{NLP}-{Methoden} in der {Klassischen} {Philologie}: {Word} {Embeddings}}, shorttitle = {{NLP}-{Methoden} in der {Klassischen} {Philologie}}, url = {https://zenodo.org/records/11582358}, abstract = {Folien zu einem Vortrag}, language = {deu}, urldate = {2024-06-12}, author = {Beyer, Andrea and Schulz, Konstantin}, month = jun, year = {2024}, doi = {10.5281/zenodo.11582358}, keywords = {Artificial intelligence, Classics, Computer and information sciences, Languages and literature, Linguistics, Representation Learning, Word Embeddings}, }
@misc{schulz_kunstliche_2024, title = {Künstliche {Intelligenz} in der {Sprachverarbeitung}}, url = {https://zenodo.org/records/11190250}, abstract = {Folien zu einem Vortrag}, language = {deu}, urldate = {2024-05-14}, author = {Schulz, Konstantin}, month = may, year = {2024}, doi = {10.5281/zenodo.11190250}, keywords = {Computer and information sciences, Languages and literature, Linguistics}, }
@misc{beyer_reflexion_2024, title = {Reflexion mit und über {KI} im {AU}}, url = {https://zenodo.org/records/10909593}, abstract = {Folien zum Arbeitskreis beim Bundeskongress des Deutschen Altphilologenverbandes 2024 in Wuppertal}, language = {deu}, urldate = {2024-04-02}, author = {Beyer, Andrea and Schulz, Konstantin}, month = apr, year = {2024}, doi = {10.5281/zenodo.10909593}, keywords = {AI Literacy, Artificial intelligence, Classics, Second Language Acquisition}, }
@misc{faltin_automatisierte_2024, title = {Automatisierte {Kategorisierung} mittellateinischer {Bittbriefe} an den {Papst}}, url = {https://zenodo.org/records/13628818}, abstract = {Die Präsentation ist im Rahmen der 27. Aquilonia an der Christian-Albrechts-Universität zu Kiel entstanden.}, urldate = {2024-09-04}, author = {Faltin, Nico and Schulz, Konstantin}, month = jul, year = {2024}, doi = {10.5281/zenodo.13628818}, }
@misc{schulz_seflag_2024, title = {{SEFLAG}. {Systematic} {Evaluation} {Framework} for {NLP} {Models} and {Datasets} in {Latin} and {Ancient} {Greek}}, url = {https://zenodo.org/records/14012948}, abstract = {Das SEFLAG-Framework, präsentiert von Konstantin Schulz und Florian Deichsler (Humboldt-Universität zu Berlin), stellt ein systematisches Evaluationsframework für NLP-Modelle und -Datensätze für Latein und Altgriechisch vor. Die Hauptziele sind die Bewertung und Dokumentation bestehender NLP-Ressourcen, die Auswahl passender Modelle für Forschungsvorhaben sowie die Förderung der Interoperabilität durch standardisierte Annotationsrichtlinien. Diese Initiative reagiert auf den steigenden Einsatz von NLP in der Literaturwissenschaft historischer Sprachen und die damit verbundenen Herausforderungen, wie die Diversität an Modellen, die fehlende zentrale Verwaltung von Ressourcen und die Notwendigkeit einer systematischen Evaluation. SEFLAG konzentriert sich zunächst auf Lemmatisierung und Named Entity Recognition (NER) und plant zukünftig die Erweiterung um weitere NLP-Tasks. Die Ergebnisse werden auf Plattformen wie Hugging Face veröffentlicht und zielen darauf ab, der Forschungsgemeinschaft Zeit und Ressourcen zu sparen. Durch Dokumentation und Benchmarking wird eine nachhaltige Infrastruktur geschaffen, die Forschenden in den Bereichen Philologie, Geschichte und Archäologie zugutekommt und Innovationen im Bereich der historischen NLP-Methoden fördert.}, language = {deu}, urldate = {2024-10-31}, author = {Schulz, Konstantin}, month = oct, year = {2024}, doi = {10.5281/zenodo.14012948}, keywords = {Artificial intelligence, Computer and information sciences, Languages and literature, Linguistics, Natural language processing}, }
@misc{beyer_genki_2024, title = {{GenKI} im {Lateinunterricht} – {Texte} im {Spracherwerb}}, url = {https://zenodo.org/records/13902476}, abstract = {Generative KI (genKI) im Lateinunterricht (LU) beschäftigt nicht nur die einzelnen Lehrenden, sondern auch die Fortbildungsplaner. In diesem Vortrag wird eine sehr knappe Einführung zum Begriff generative KI und Prompting geboten. Kern der Fortbildung sind Anwendungsszenarien von genKI bei der Textarbeit im Lateinunterricht. Von besonderem Interesse sind die lateinischen, didaktisierten Texte im Rahmen des Spracherwerbs.}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = oct, year = {2024}, doi = {10.5281/zenodo.13902476}, keywords = {Latin classes, generative AI, language acquisition}, }
@article{kuehnast_development_2024, title = {Development of basic reading skills in {Latin}: a corpus-based tool for computer-assisted fluency training}, volume = {11}, issn = {null}, shorttitle = {Development of basic reading skills in {Latin}}, url = {https://doi.org/10.1080/2331186X.2024.2416819}, doi = {10.1080/2331186X.2024.2416819}, abstract = {The present paper evaluates the processes of reading acquisition in Latin from the component-skills approach and discusses how advances in reading in modern foreign languages could be adapted to the specific needs of Latin as a historical language. Compared to the holistic and socially embedded approaches to modern foreign language acquisition, the grammar-translation method traditionally used in schools shows considerable weaknesses in the development of basic reading skills in Latin. Therefore, we address the possible advantages of corpus-based teaching strategies and present Machina Callida, a psycholinguistically informed e-tutor suitable for supporting Latin vocabulary acquisition and reading comprehension at beginner and intermediate levels. Using digital corpora of original Latin texts, the application semi-automatically generates contextualized vocabulary exercises tailored to the needs of different groups of learners. Through its integration with the research data repository Zenodo, Machina Callida supports online collaboration in the creation and distribution of open educational resources through crowdsourcing.}, number = {1}, urldate = {2024-10-22}, journal = {Cogent Education}, author = {Kuehnast, Milena and Schulz, Konstantin and Lüdeling, Anke}, month = dec, year = {2024}, note = {Publisher: Cogent OA \_eprint: https://doi.org/10.1080/2331186X.2024.2416819}, keywords = {CALL, Classical Language \& Literature, Classroom Practice, Databases, Language \& Linguistics, Language Teaching \& Learning, Latin, Open \& Distance Education and eLearning, Teaching \& Learning - Education, corpus resources, reading comprehension, vocabulary acquisition}, pages = {2416819}, }
@misc{sun_lalaeval_2024, title = {{LalaEval}: {A} {Holistic} {Human} {Evaluation} {Framework} for {Domain}-{Specific} {Large} {Language} {Models}}, shorttitle = {{LalaEval}}, url = {http://arxiv.org/abs/2408.13338}, doi = {10.48550/arXiv.2408.13338}, abstract = {This paper introduces LalaEval, a holistic framework designed for the human evaluation of domain-specific large language models (LLMs). LalaEval proposes a comprehensive suite of end-to-end protocols that cover five main components including domain specification, criteria establishment, benchmark dataset creation, construction of evaluation rubrics, and thorough analysis and interpretation of evaluation outcomes. This initiative aims to fill a crucial research gap by providing a systematic methodology for conducting standardized human evaluations within specific domains, a practice that, despite its widespread application, lacks substantial coverage in the literature and human evaluation are often criticized to be less reliable due to subjective factors, so standardized procedures adapted to the nuanced requirements of specific domains or even individual organizations are in great need. Furthermore, the paper demonstrates the framework's application within the logistics industry, presenting domain-specific evaluation benchmarks, datasets, and a comparative analysis of LLMs for the logistics domain use, highlighting the framework's capacity to elucidate performance differences and guide model selection and development for domain-specific LLMs. Through real-world deployment, the paper underscores the framework's effectiveness in advancing the field of domain-specific LLM evaluation, thereby contributing significantly to the ongoing discussion on LLMs' practical utility and performance in domain-specific applications.}, urldate = {2024-09-03}, publisher = {arXiv}, author = {Sun, Chongyan and Lin, Ken and Wang, Shiwei and Wu, Hulong and Fu, Chengfei and Wang, Zhen}, month = aug, year = {2024}, note = {arXiv:2408.13338 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Human-Computer Interaction}, }
@misc{liang_whats_2024, title = {What's documented in {AI}? {Systematic} {Analysis} of {32K} {AI} {Model} {Cards}}, shorttitle = {What's documented in {AI}?}, url = {http://arxiv.org/abs/2402.05160}, doi = {10.48550/arXiv.2402.05160}, abstract = {The rapid proliferation of AI models has underscored the importance of thorough documentation, as it enables users to understand, trust, and effectively utilize these models in various applications. Although developers are encouraged to produce model cards, it's not clear how much information or what information these cards contain. In this study, we conduct a comprehensive analysis of 32,111 AI model documentations on Hugging Face, a leading platform for distributing and deploying AI models. Our investigation sheds light on the prevailing model card documentation practices. Most of the AI models with substantial downloads provide model cards, though the cards have uneven informativeness. We find that sections addressing environmental impact, limitations, and evaluation exhibit the lowest filled-out rates, while the training section is the most consistently filled-out. We analyze the content of each section to characterize practitioners' priorities. Interestingly, there are substantial discussions of data, sometimes with equal or even greater emphasis than the model itself. To evaluate the impact of model cards, we conducted an intervention study by adding detailed model cards to 42 popular models which had no or sparse model cards previously. We find that adding model cards is moderately correlated with an increase weekly download rates. Our study opens up a new perspective for analyzing community norms and practices for model documentation through large-scale data science and linguistics analysis.}, urldate = {2024-09-03}, publisher = {arXiv}, author = {Liang, Weixin and Rajani, Nazneen and Yang, Xinyu and Ozoani, Ezinwanne and Wu, Eric and Chen, Yiqun and Smith, Daniel Scott and Zou, James}, month = feb, year = {2024}, note = {arXiv:2402.05160 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Computer Science - Software Engineering}, }
@misc{liu_automatic_2024, title = {Automatic {Generation} of {Model} and {Data} {Cards}: {A} {Step} {Towards} {Responsible} {AI}}, shorttitle = {Automatic {Generation} of {Model} and {Data} {Cards}}, url = {https://arxiv.org/abs/2405.06258v2}, abstract = {In an era of model and data proliferation in machine learning/AI especially marked by the rapid advancement of open-sourced technologies, there arises a critical need for standardized consistent documentation. Our work addresses the information incompleteness in current human-generated model and data cards. We propose an automated generation approach using Large Language Models (LLMs). Our key contributions include the establishment of CardBench, a comprehensive dataset aggregated from over 4.8k model cards and 1.4k data cards, coupled with the development of the CardGen pipeline comprising a two-step retrieval process. Our approach exhibits enhanced completeness, objectivity, and faithfulness in generated model and data cards, a significant step in responsible AI documentation practices ensuring better accountability and traceability.}, language = {en}, urldate = {2024-09-03}, journal = {arXiv.org}, author = {Liu, Jiarui and Li, Wenkai and Jin, Zhijing and Diab, Mona}, month = may, year = {2024}, }
@inproceedings{martinelli_exploring_2024, title = {Exploring {Neural} {Topic} {Modeling} on a {Classical} {Latin} {Corpus}}, author = {Martinelli, Ginevra and Impicciché, Paola and Fersini, Elisabetta and Mambrini, Francesco and Passarotti, Marco}, year = {2024}, pages = {6929--6934}, }
@article{stopponi_agree_2024, title = {{AGREE}: a new benchmark for the evaluation of distributional semantic models of ancient {Greek}}, shorttitle = {{AGREE}}, url = {https://research.rug.nl/en/publications/agree-a-new-benchmark-for-the-evaluation-of-distributional-semant}, urldate = {2024-04-05}, journal = {Digital Scholarship in the Humanities}, author = {Stopponi, Silvia and Peels-Matthey, Saskia and Nissim, Malvina}, year = {2024}, note = {Publisher: Oxford University Press}, }
@article{stopponi_natural_2024, title = {Natural {Language} {Processing} for {Ancient} {Greek}: {Design}, advantages and challenges of language models}, issn = {0176-4225}, journal = {Diachronica}, author = {Stopponi, Silvia and Pedrazzini, Nilo and Peels-Matthey, Saskia and McGillivray, Barbara and Nissim, Malvina}, year = {2024}, note = {Publisher: John Benjamins Publishing Company Amsterdam/Philadelphia}, }
@inproceedings{stussi_part--speech_2024, title = {Part-of-{Speech} {Tagging} of 16th-{Century} {Latin} with {GPT}}, url = {https://aclanthology.org/2024.latechclfl-1.18.pdf}, booktitle = {Proceedings of the 8th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature} ({LaTeCH}-{CLfL} 2024)}, author = {Stüssi, Elina and Ströbel, Phillip}, year = {2024}, pages = {196--206}, }
@misc{noauthor_digital_2024, type = {online resource}, title = {Digital {Tools} for {Learning} {Ancient} {Greek} and {Latin} and {Guiding} {Phrases} for {Using} {Generative} {AI} in {Ancient} {Language} {Study}}, url = {https://figshare.com/articles/online_resource/Digital_Tools_for_Learning_Ancient_Greek_and_Latin_and_Guiding_Phrases_for_Using_Generative_AI_in_Ancient_Language_Study/25391782/3}, abstract = {This document is a short introductory guide to the digital tools available for supporting the study of Ancient Greek and Latin. The first part of this guide is a list of our preferred digital tools for supporting Ancient Greek and Latin learning. This list is not exhaustive, but it does include a variety of generative AI tools and their ideal uses for supporting ancient language learning. The second part of this guide is a series of pre-prepared prompts which can be copy-pasted into a conversational AI tool to guide the conversation towards your expected learning level. Before using generative AI to support your studies, make sure to take a look at some of our instructional videos about the ethics of using generative AI. \#STOPandTHINKbeforeyouGENERATE}, language = {en}, urldate = {2024-05-21}, journal = {figshare}, month = mar, year = {2024}, doi = {10.6084/m9.figshare.25391782.v3}, note = {Publisher: figshare}, }
@inproceedings{sprugnoli_overview_2024, title = {Overview of the {EvaLatin} 2024 evaluation campaign}, url = {https://aclanthology.org/2024.lt4hala-1.21.pdf}, booktitle = {Proceedings of the {Third} {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages} ({LT4HALA})@ {LREC}-{COLING}-2024}, author = {Sprugnoli, Rachele and Iurescia, Federica and Passarotti, Marco}, year = {2024}, pages = {190--197}, }
@misc{krause_graphannis_2024, title = {{graphANNIS}}, copyright = {Apache-2.0}, url = {https://github.com/korpling/graphANNIS}, abstract = {This is a new backend implementation of the ANNIS linguistic search and visualization system.}, urldate = {2024-02-27}, author = {Krause, Thomas}, month = feb, year = {2024}, doi = {10.5281/zenodo.2598164}, }
@inproceedings{beyer_data_2023, address = {Tübingen, Germany}, title = {Data {Literacy} für die {Klassische} {Philologie}: {dAIdalos} – eine interaktive {Infrastruktur} als {Lernangebot}}, shorttitle = {Data {Literacy} für die {Klassische} {Philologie}}, url = {https://zenodo.org/record/8420565}, doi = {10.5281/zenodo.8420565}, abstract = {Abstract für das gleichnamige Poster bei der Konferenz "FORGE 2023 - Forschungsdaten in den Geisteswissenschaften: Anything Goes?! Forschungsdaten in den Geisteswissenschaften - kritisch betrachtet": https://forge23.uni-tuebingen.de/}, language = {deu}, urldate = {2023-10-09}, publisher = {Zenodo}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, keywords = {Digital Classics, data literacy, open educational resources, research infrastructure}, }
@inproceedings{beersmans_training_2023, address = {Varna, Bulgaria}, title = {Training and {Evaluation} of {Named} {Entity} {Recognition} {Models} for {Classical} {Latin}}, url = {https://aclanthology.org/2023.alp-1.1.pdf}, abstract = {We evaluate the performance of various models on the task of named entity recognition (NER) for classical Latin. Using an existing dataset, we train two transformer-based LatinBERT models and one shallow conditional random field (CRF) model. The performance is assessed using both standard metrics and a detailed manual error analysis, and compared to the results obtained by different already released Latin NER tools. Both analyses demonstrate that the BERT models achieve a better f1-score than the other models. Furthermore, we annotate new, unseen data for further evaluation of the models, and we discuss the impact of annotation choices on the results.}, language = {English}, booktitle = {Proceedings of the {Ancient} {Language} {Processing} {Workshop}}, publisher = {INCOMA Ltd.}, author = {Beersmans, Marijke and de Graaf, Evelien and Van de Cruys, Tim and Fantoli, Margherita}, year = {2023}, pages = {1--12}, }
@phdthesis{yousef_translation_2023, address = {Leipzig}, title = {Translation {Alignment} {Applied} to {Historical} {Languages}: methods, evaluation, applications, and visualization}, shorttitle = {Translation {Alignment} {Applied} to {Historical} {Languages}}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:15-qucosa2-864719}, school = {Universität Leipzig}, author = {Yousef, Tariq}, year = {2023}, note = {https://ul.qucosa.de/api/qucosa\%3A86471/attachment/ATT-0/}, }
@inproceedings{yousef_transformer-based_2023, address = {Graz}, title = {Transformer-{Based} {Named} {Entity} {Recognition} for {Ancient} {Greek}}, url = {https://zenodo.org/records/8107629}, abstract = {This paper presents our work on training two automatic NER models for ancient Greek using transformer-based models. The models classify the entities into three categories, namely, Person, Location, and Miscellaneous and achieved promising results on test and evaluation datasets.}, language = {eng}, urldate = {2024-01-04}, booktitle = {Digital {Humanities} 2023: {Book} of {Abstracts}}, author = {Yousef, Tariq and Palladino, Chiara and Janicke, Stefan}, year = {2023}, keywords = {Ancient Greek, Computer science, Cultural studies, Humanities computing, Linguistics, Long Presentation, Named Entities Recognition, Paper, Transformer models, and methods, annotation structures, natural language processing, systems}, pages = {420--422}, }
@inproceedings{yousef_classical_2023, address = {Varna, Bulgaria}, title = {Classical {Philology} in the {Time} of {AI}: {Exploring} the {Potential} of {Parallel} {Corpora} in {Ancient} {Language}}, shorttitle = {Classical {Philology} in the {Time} of {AI}}, url = {https://aclanthology.org/2023.alp-1.21.pdf}, abstract = {This paper provides an overview of diverse applications of parallel corpora in ancient languages, particularly Ancient Greek. In the first part, we provide the fundamental principles of parallel corpora and a short overview of their applications in the study of ancient texts. In the second part, we illustrate how to leverage on parallel corpora to perform various NLP tasks, including automatic translation alignment, dynamic lexica induction, and Named Entity Recognition. In the conclusions, we emphasize current limitations and future work.}, urldate = {2025-01-26}, booktitle = {Proceedings of the {Ancient} {Language} {Processing} {Workshop} associated with {RANLP}-2023}, author = {Yousef, Tariq and Palladino, Chiara and Shamsian, Farnoosh}, year = {2023}, pages = {179--192}, }
@inproceedings{berti_named_2023, address = {Graz}, title = {Named {Entity} {Recognition} for a {Text}-{Based} {Catalog} of {Ancient} {Greek} {Authors} and {Works}}, copyright = {Creative Commons Attribution 4.0 International, Open Access}, url = {https://zenodo.org/record/8108058}, doi = {10.5281/ZENODO.8108058}, abstract = {This poster proposal presents a project whose results are the linguistic annotation of ancient Greek bibliographic references with a focus on Named Entity Recognition related to author names and work titles, in order to produce new dynamic text-based tools that are not available in existing indices and catalogs.}, language = {en}, urldate = {2025-01-26}, booktitle = {Digital {Humanities} 2023: {Book} of {Abstracts}}, publisher = {Zenodo}, author = {Berti, Monica}, year = {2023}, keywords = {CITE Architecture, FOS: Languages and literature, Library \& information science, Linguistics, Literary studies, Paper, Philology, Poster, analysis, ancient Greek, and methods, concordancing and indexing, digital classics, digital philology, linked (open) data, literary canon, natural language processing, scholarly editing and editions development}, pages = {557}, }
@article{laupichler_evaluating_2023, title = {Evaluating {AI} {Courses}: {A} {Valid} and {Reliable} {Instrument} for {Assessing} {Artificial}-{Intelligence} {Learning} through {Comparative} {Self}-{Assessment}}, volume = {13}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {2227-7102}, shorttitle = {Evaluating {AI} {Courses}}, url = {https://www.mdpi.com/2227-7102/13/10/978}, doi = {10.3390/educsci13100978}, abstract = {A growing number of courses seek to increase the basic artificial-intelligence skills (“AI literacy”) of their participants. At this time, there is no valid and reliable measurement tool that can be used to assess AI-learning gains. However, the existence of such a tool would be important to enable quality assurance and comparability. In this study, a validated AI-literacy-assessment instrument, the “scale for the assessment of non-experts’ AI literacy” (SNAIL) was adapted and used to evaluate an undergraduate AI course. We investigated whether the scale can be used to reliably evaluate AI courses and whether mediator variables, such as attitudes toward AI or participation in other AI courses, had an influence on learning gains. In addition to the traditional mean comparisons (i.e., t-tests), the comparative self-assessment (CSA) gain was calculated, which allowed for a more meaningful assessment of the increase in AI literacy. We found preliminary evidence that the adapted SNAIL questionnaire enables a valid evaluation of AI-learning gains. In particular, distinctions among different subconstructs and the differentiation constructs, such as attitudes toward AI, seem to be possible with the help of the SNAIL questionnaire.}, language = {en}, number = {10}, urldate = {2025-01-21}, journal = {Education Sciences}, author = {Laupichler, Matthias Carl and Aster, Alexandra and Perschewski, Jan-Ole and Schleiss, Johannes}, month = oct, year = {2023}, note = {Number: 10 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {AI literacy, AI-literacy scale, artificial intelligence education, assessment, comparative self-assessment, course evaluation}, pages = {978}, }
@misc{xi_rise_2023, title = {The {Rise} and {Potential} of {Large} {Language} {Model} {Based} {Agents}: {A} {Survey}}, shorttitle = {The {Rise} and {Potential} of {Large} {Language} {Model} {Based} {Agents}}, url = {http://arxiv.org/abs/2309.07864}, doi = {10.48550/arXiv.2309.07864}, abstract = {For a long time, humanity has pursued artificial intelligence (AI) equivalent to or surpassing the human level, with AI agents considered a promising vehicle for this pursuit. AI agents are artificial entities that sense their environment, make decisions, and take actions. Many efforts have been made to develop intelligent agents, but they mainly focus on advancement in algorithms or training strategies to enhance specific capabilities or performance on particular tasks. Actually, what the community lacks is a general and powerful model to serve as a starting point for designing AI agents that can adapt to diverse scenarios. Due to the versatile capabilities they demonstrate, large language models (LLMs) are regarded as potential sparks for Artificial General Intelligence (AGI), offering hope for building general AI agents. Many researchers have leveraged LLMs as the foundation to build AI agents and have achieved significant progress. In this paper, we perform a comprehensive survey on LLM-based agents. We start by tracing the concept of agents from its philosophical origins to its development in AI, and explain why LLMs are suitable foundations for agents. Building upon this, we present a general framework for LLM-based agents, comprising three main components: brain, perception, and action, and the framework can be tailored for different applications. Subsequently, we explore the extensive applications of LLM-based agents in three aspects: single-agent scenarios, multi-agent scenarios, and human-agent cooperation. Following this, we delve into agent societies, exploring the behavior and personality of LLM-based agents, the social phenomena that emerge from an agent society, and the insights they offer for human society. Finally, we discuss several key topics and open problems within the field. A repository for the related papers at https://github.com/WooooDyy/LLM-Agent-Paper-List.}, urldate = {2025-01-15}, publisher = {arXiv}, author = {Xi, Zhiheng and Chen, Wenxiang and Guo, Xin and He, Wei and Ding, Yiwen and Hong, Boyang and Zhang, Ming and Wang, Junzhe and Jin, Senjie and Zhou, Enyu and Zheng, Rui and Fan, Xiaoran and Wang, Xiao and Xiong, Limao and Zhou, Yuhao and Wang, Weiran and Jiang, Changhao and Zou, Yicheng and Liu, Xiangyang and Yin, Zhangyue and Dou, Shihan and Weng, Rongxiang and Cheng, Wensen and Zhang, Qi and Qin, Wenjuan and Zheng, Yongyan and Qiu, Xipeng and Huang, Xuanjing and Gui, Tao}, month = sep, year = {2023}, note = {arXiv:2309.07864 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@article{yousef_named_2023, title = {Named {Entity} {Annotation} {Projection} {Applied} to {Classical} {Languages}}, url = {https://aclanthology.org/2023.latechclfl-1.19.pdf}, abstract = {In this study, we demonstrate how to apply cross-lingual annotation projection to transfer named-entity annotations to classical languages for which limited or no resources and annotated texts are available, aiming to enrich their NER training datasets and train a model to perform NER tagging. Our approach employs sentence-level aligned corpora of ancient texts and the translation in a modern language, for which high-quality off-the-shelf NER systems are available. We automatically annotate the text of the modern language and employ a stateof-the-art neural word alignment system to find translation equivalents. Finally, we transfer the annotations to the corresponding tokens in the ancient texts using a direct projection heuristic. We applied our method to ancient Greek and Latin using the Bible with the English translation as a parallel corpus. We used the resulting annotations to enhance the performance of an existing NER model for ancient Greek.}, language = {en}, journal = {Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature}, author = {Yousef, Tariq and Palladino, Chiara and Heyer, Gerhard and Jänicke, Stefan}, year = {2023}, pages = {175--182}, }
@article{rebora_sentiment_2023, title = {Sentiment {Analysis} in {Literary} {Studies}. {A} {Critical} {Survey}}, volume = {17}, url = {https://digitalhumanities.org/dhq/vol/17/2/000691/000691.html}, number = {2}, journal = {digital humanities quaterly}, author = {Rebora, Simone}, year = {2023}, }
@article{sprugnoli_sentiment_2023, title = {The {Sentiment} of {Latin} {Poetry}. {Annotation} and {Automatic} {Analysis} of the {Odes} of {Horace}}, volume = {9}, issn = {2499-4553}, url = {https://journals.openedition.org/ijcol/1125}, number = {9-1}, journal = {IJCoL. Italian Journal of Computational Linguistics}, author = {Sprugnoli, Rachele and Mambrini, Francesco and Passarotti, Marco and Moretti, Giovanni}, year = {2023}, note = {Publisher: Accademia University Press}, pages = {53--71}, }
@misc{beyer_digitalgestutzte_2023, title = {Digitalgestützte {Textanalyse} in {Forschung} und {Lehre}}, url = {https://zenodo.org/records/8388745}, abstract = {Folien zum Vortrag bei der Konferenz "(Digitale) Chancen für den Lateinunterricht": https://www.altphil.uni-freiburg.de/termine/digilat2023}, language = {deu}, urldate = {2024-10-31}, author = {Beyer, Andrea}, month = sep, year = {2023}, doi = {10.5281/zenodo.8388745}, keywords = {Digital Classics, Digital Humanities, Latin language, language learning}, }
@misc{beyer_chatbot_2023, address = {Berlin, Germany}, title = {@chatbot: warum kannst du latein et quo vadis?}, shorttitle = {@chatbot}, url = {https://zenodo.org/record/8412875}, abstract = {Folien zum Vortrag bei der Fortbildung "@chatbot: warum kannst du latein et quo vadis?": http://www.davbb.de/veranstaltungen-fortbildungen/248-chatbot}, language = {deu}, urldate = {2023-10-06}, author = {Beyer, Andrea and Schulz, Konstantin}, month = oct, year = {2023}, doi = {10.5281/zenodo.8412875}, keywords = {Latin language, artificial intelligence, artificial intelligence literacy, language learning}, }
@inproceedings{beyer_daidalos_2023, address = {Berlin}, title = {{DAIdalos}: {Forschen} und {Lernen} zugleich?}, isbn = {978-3-88579-731-9}, shorttitle = {{DAIdalos}}, url = {https://dl.gi.de/handle/20.500.12116/43162}, doi = {10.18420/inf2023_42}, abstract = {Die Daidalos-Infrastruktur soll es Forschenden der Klassischen Philologie und verwandter Disziplinen ermöglichen, verschiedene Methoden des Natural Language Processing an selbst zusammengestellten Forschungskorpora anzuwenden. Dabei ist Daidalos als interaktive Lern- und Forschungsinfrastruktur konzipiert, die den Ausbau wesentlicher Teilfähigkeiten von Data Literacy, z. B. die Zusammenstellung und Analyse von Korpora oder den Umgang mit Annotationen, TEI-XML und graphischen Auswertungen, unterstützt.}, language = {de}, urldate = {2023-12-14}, publisher = {Gesellschaft für Informatik e.V.}, author = {Beyer, Andrea and Schulz, Konstantin}, year = {2023}, pages = {391--393}, }
@misc{beyer_daidalos_2023, title = {Daidalos: {Forschen} und {Lernen} zugleich? {Data} {Literacy} als {Lernaufgabe} für die {Klassisch}-philologische {Forschung}}, shorttitle = {Daidalos}, url = {https://zenodo.org/record/8388900}, abstract = {Poster zur Präsentation des Daidalos-Projekts beim "Workshop KI-Bildung. Ein Workshop zu Aus- und Weiterbildung über Künstliche Intelligenz im Rahmen der GI-Tagung INFORMATIK 2023" an der HTW Berlin}, language = {deu}, urldate = {2023-09-29}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8388900}, keywords = {Digital Classics, data literacy, natural language processing, research infrastructure}, }
@misc{beyer_ki-bildung_2023, address = {Bielefeld, Germany}, title = {{KI}-{Bildung}: {Was}, warum und wie?}, shorttitle = {{KI}-{Bildung}}, url = {https://zenodo.org/record/8381513}, abstract = {Folien zum Vortrag bei der Fachtagung "Zwischen Wachstafel und ChatGPT – KI im Lateinunterricht": https://www.uni-bielefeld.de/fakultaeten/linguistik-literaturwissenschaft/studium-lehre/faecher/latein/projekte/fachtagung-ki-im-lateinun/index.xml}, language = {deu}, urldate = {2023-09-29}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8381513}, keywords = {Latin language, artificial intelligence, artificial intelligence literacy, language learning}, }
@misc{beyer_mit_2023, address = {Bielefeld, Germany}, title = {Mit und über {KI}-{Tools} im {Literaturunterricht} reflektieren}, url = {https://zenodo.org/record/8388817}, abstract = {Folien zum Workshop bei der Fachtagung "Zwischen Wachstafel und ChatGPT – KI im Lateinunterricht": https://www.uni-bielefeld.de/fakultaeten/linguistik-literaturwissenschaft/studium-lehre/faecher/latein/projekte/fachtagung-ki-im-lateinun/index.xml}, language = {deu}, urldate = {2023-09-29}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8388817}, keywords = {Latin language, artificial intelligence, artificial intelligence literacy, language learning}, }
@misc{beyer_data_2023, title = {Data {Literacy} für die {Klassische} {Philologie} - {dAIdalos} - eine interkative {Infrastruktur} als {Lernangebot}}, url = {https://zenodo.org/record/8392485}, abstract = {Das Poster informiert über das DFG-geförderte explorative Entwicklungsvorhaben Daidalos, das es Forschenden der Klassischen Philologie und verwandter Disziplinen ermöglichen soll, verschiedene Methoden des Natural Language Processing (NLP) an selbst zusammengestellten Forschungskorpora anzuwenden. Dabei ist Daidalos als interaktive Forschungsinfrastruktur konzipiert, die zugleich den Ausbau wesentlicher Teilfähigkeiten von Data Literacy, z. B. die Zusammenstellung und Analyse von Korpora oder den Umgang mit Annotationen, TEI-XML und graphischen Auswertungen, unterstützt. Hierzu sind vor allem forschungsorientierte, didaktische Lernbausteine und deren Implementierung in die Infrastruktur angedacht, um ein fach- und forschungsbezogenes Lernen zu ermöglichen.}, urldate = {2023-10-09}, author = {Beyer, Andrea and Schulz, Konstantin}, month = sep, year = {2023}, doi = {10.5281/zenodo.8392485}, keywords = {Data Literacy, FORGE2023, JupyterLab, KI-Didaktik, NLP}, }
@article{hambarde_information_2023, title = {Information {Retrieval}: {Recent} {Advances} and {Beyond}}, volume = {11}, issn = {2169-3536}, shorttitle = {Information {Retrieval}}, url = {http://arxiv.org/abs/2301.08801}, doi = {10.1109/ACCESS.2023.3295776}, abstract = {In this paper, we provide a detailed overview of the models used for information retrieval in the first and second stages of the typical processing chain. We discuss the current state-of-the-art models, including methods based on terms, semantic retrieval, and neural. Additionally, we delve into the key topics related to the learning process of these models. This way, this survey offers a comprehensive understanding of the field and is of interest for for researchers and practitioners entering/working in the information retrieval domain.}, urldate = {2024-09-03}, journal = {IEEE Access}, author = {Hambarde, Kailash A. and Proenca, Hugo}, year = {2023}, note = {arXiv:2301.08801 [cs]}, keywords = {Computer Science - Information Retrieval}, pages = {76581--76604}, }
@inproceedings{celano_neural_2023, title = {A {Neural} {Network} {Approach} to {Ellipsis} {Detection} in {Ancient} {Greek}}, author = {Celano, Giuseppe GA}, year = {2023}, pages = {151--158}, }
@inproceedings{beersmans_training_2023, title = {Training and {Evaluation} of {Named} {Entity} {Recognition} {Models} for {Classical} {Latin}}, url = {https://zenodo.org/doi/10.5281/zenodo.8337363}, author = {Beersmans, Marijke and de Graaf, Evelien and Van de Cruys, Tim and Fantoli, Margherita}, year = {2023}, pages = {1--12}, }
@inproceedings{berti_named_2023, title = {Named {Entity} {Recognition} for a {Text}-{Based} {Catalog} of {Ancient} {Greek} {Authors} and {Works}}, url = {https://www.academia.edu/download/107756648/BERTI_Monica_Named_Entity_Recognition_for_a_Text_Based_Catal.pdf}, author = {Berti, Monica}, year = {2023}, }
@article{ross_new_2023, title = {A {New} {Frontier}: {AI} and {Ancient} {Language} {Pedagogy}}, volume = {24}, issn = {2058-6310}, shorttitle = {A {New} {Frontier}}, url = {https://www.cambridge.org/core/journals/journal-of-classics-teaching/article/new-frontier-ai-and-ancient-language-pedagogy/A63EF69F5FE5529F0F45FB1EB655A9F7}, doi = {10.1017/S2058631023000430}, abstract = {In November 2022, ChatGPT 3.5 was released on a public research preview, gaining notoriety for its ability to pull from a vast body of information to create coherent and digestible bodies of text that accurately respond to queries (OpenAI, 2022). It is able to recognise the grammar and vocabulary of ancient languages, translate passages, and compose texts at an alarmingly accurate and rapid rate. For teachers, this AI has had mixed reviews. Some fear its ability to produce well-written work effortlessly, while others are excited by its abilities to push the boundaries of current teaching practices. This paper explores how well ChatGPT explains grammatical concepts, parses inflected forms, and translates Classical Latin, Ancient Greek, and Classical Sanskrit. Overall, ChatGPT is rather good at working with Classical Latin and Sanskrit, but its abilities with Ancient Greek are deeply problematic. Although it is quite flawed at this time, ChatGPT, when used properly, could become a useful a tool for ancient language study. With proper guiding phrases, students could use this AI to practise vocabulary, check their translations, and rephrase grammatical concepts.}, language = {en}, number = {48}, urldate = {2024-03-26}, journal = {Journal of Classics Teaching}, author = {Ross, Edward A. S.}, month = oct, year = {2023}, keywords = {Ancient Greek, Ancient Language Pedagogy, Artificial Intelligence, ChatGPT, Classical Latin, Classical Sanskrit, New Teaching Tools}, pages = {143--161}, }
@article{krahn_sentence_2023, title = {Sentence embedding models for {Ancient} {Greek} using multilingual knowledge distillation}, journal = {arXiv preprint arXiv:2308.13116}, author = {Krahn, Kevin and Tate, Derrick and Lamicela, Andrew C}, year = {2023}, }
@article{sansom_sedes_2023, title = {{SEDES}: {Metrical} {Position} in {Greek} {Hexameter}}, volume = {017}, issn = {1938-4122}, shorttitle = {{SEDES}}, url = {https://digitalhumanities.org/dhq/vol/17/2/000675/000675.html}, number = {2}, journal = {Digital Humanities Quarterly}, author = {Sansom, Stephen A. and Fifield, David}, month = may, year = {2023}, }
@article{stopponi_evaluation_2023, title = {Evaluation of {Distributional} {Semantic} {Models} of {Ancient} {Greek}: {Ancient} {Language} {Processing}}, shorttitle = {Evaluation of {Distributional} {Semantic} {Models} of {Ancient} {Greek}}, url = {https://pure.rug.nl/ws/portalfiles/portal/777728242/stopponi_et_al_2023_alp.pdf}, abstract = {We evaluate four count-based and predictive distributional semantic models of Ancient Greek against AGREE, a composite benchmark of human judgements, to assess their ability to retrieve semantic relatedness. On the basis of the observations deriving from the analysis of the results, we design a procedure for a largerscale intrinsic evaluation of count-based and predictive language models, including syntactic embeddings. We also propose possible ways of exploiting the different layers of the whole AGREE benchmark (including both humanand machine-generated data) and different evaluation metrics.}, journal = {Proceedings of the Ancient Language Processing Workshop}, author = {Stopponi, Silvia and Pedrazzini, Nilo and Peels-Matthey, Saskia and McGillivray, Barbara and Nissim, Malvina}, month = sep, year = {2023}, note = {Place: Varna, Bulgaria Publisher: Association for Computational Linguistics (ACL)}, keywords = {ancient greek, ancient languages, benchmark, evaluation, languge models, natural language processing, word2vec}, pages = {49--58}, }
@misc{wang_gpt-ner_2023, title = {{GPT}-{NER}: {Named} {Entity} {Recognition} via {Large} {Language} {Models}}, shorttitle = {{GPT}-{NER}}, url = {http://arxiv.org/abs/2304.10428}, doi = {10.48550/arXiv.2304.10428}, abstract = {Despite the fact that large-scale Language Models (LLM) have achieved SOTA performances on a variety of NLP tasks, its performance on NER is still significantly below supervised baselines. This is due to the gap between the two tasks the NER and LLMs: the former is a sequence labeling task in nature while the latter is a text-generation model. In this paper, we propose GPT-NER to resolve this issue. GPT-NER bridges the gap by transforming the sequence labeling task to a generation task that can be easily adapted by LLMs e.g., the task of finding location entities in the input text "Columbus is a city" is transformed to generate the text sequence "@@Columbus\#\# is a city", where special tokens @@\#\# marks the entity to extract. To efficiently address the "hallucination" issue of LLMs, where LLMs have a strong inclination to over-confidently label NULL inputs as entities, we propose a self-verification strategy by prompting LLMs to ask itself whether the extracted entities belong to a labeled entity tag. We conduct experiments on five widely adopted NER datasets, and GPT-NER achieves comparable performances to fully supervised baselines, which is the first time as far as we are concerned. More importantly, we find that GPT-NER exhibits a greater ability in the low-resource and few-shot setups, when the amount of training data is extremely scarce, GPT-NER performs significantly better than supervised models. This demonstrates the capabilities of GPT-NER in real-world NER applications where the number of labeled examples is limited.}, urldate = {2024-06-21}, publisher = {arXiv}, author = {Wang, Shuhe and Sun, Xiaofei and Li, Xiaoya and Ouyang, Rongbin and Wu, Fei and Zhang, Tianwei and Li, Jiwei and Wang, Guoyin}, month = oct, year = {2023}, note = {arXiv:2304.10428 [cs]}, keywords = {Computer Science - Computation and Language}, }
@misc{myerston_grecy_2023, title = {{greCy}: {Ancient} {Greek} {spaCy} models for {Natural} {Language} {Processing} in {Python}}, copyright = {MIT}, shorttitle = {{greCy}}, url = {https://github.com/jmyerston/greCy}, abstract = {Ancient Greek language models for spaCy}, urldate = {2024-02-27}, author = {Myerston, Jacobo and López, Jose}, month = dec, year = {2023}, note = {original-date: 2022-09-18T23:13:41Z}, }
@article{sommerschield_machine_2023, title = {Machine {Learning} for {Ancient} {Languages}: {A} {Survey}}, issn = {0891-2017}, shorttitle = {Machine {Learning} for {Ancient} {Languages}}, url = {https://doi.org/10.1162/coli_a_00481}, doi = {10.1162/coli_a_00481}, abstract = {Ancient languages preserve the cultures and histories of the past. However, their study is fraught with difficulties, and experts must tackle a range of challenging text-based tasks, from deciphering lost languages to restoring damaged inscriptions, to determining the authorship of works of literature. Technological aids have long supported the study of ancient texts, but in recent years advances in artificial intelligence and machine learning have enabled analyses on a scale and in a detail that are reshaping the field of humanities, similarly to how microscopes and telescopes have contributed to the realm of science. This article aims to provide a comprehensive survey of published research using machine learning for the study of ancient texts written in any language, script, and medium, spanning over three and a half millennia of civilizations around the ancient world. To analyze the relevant literature, we introduce a taxonomy of tasks inspired by the steps involved in the study of ancient documents: digitization, restoration, attribution, linguistic analysis, textual criticism, translation, and decipherment. This work offers three major contributions: first, mapping the interdisciplinary field carved out by the synergy between the humanities and machine learning; second, highlighting how active collaboration between specialists from both fields is key to producing impactful and compelling scholarship; third, highlighting promising directions for future work in this field. Thus, this work promotes and supports the continued collaborative impetus between the humanities and machine learning.}, urldate = {2023-09-15}, journal = {Computational Linguistics}, author = {Sommerschield, Thea and Assael, Yannis and Pavlopoulos, John and Stefanak, Vanessa and Senior, Andrew and Dyer, Chris and Bodel, John and Prag, Jonathan and Androutsopoulos, Ion and Freitas, Nando de}, month = aug, year = {2023}, pages = {1--45}, }
@article{du_shortcut_2023, title = {Shortcut {Learning} of {Large} {Language} {Models} in {Natural} {Language} {Understanding}}, volume = {67}, issn = {0001-0782}, url = {https://dl.acm.org/doi/10.1145/3596490}, doi = {10.1145/3596490}, abstract = {Shortcuts often hinder the robustness of large language models.}, number = {1}, urldate = {2024-05-01}, journal = {Communications of the ACM}, author = {Du, Mengnan and He, Fengxiang and Zou, Na and Tao, Dacheng and Hu, Xia}, year = {2023}, pages = {110--120}, }
@inproceedings{yousef_classical_2023, title = {Classical {Philology} in the {Time} of {AI}: {Exploring} the {Potential} of {Parallel} {Corpora} in {Ancient} {Languages}}, shorttitle = {Classical {Philology} in the {Time} of {AI}}, url = {https://www.researchgate.net/profile/Chiara-Palladino/publication/373638720_Classical_Philology_in_the_Time_of_AI_Exploring_the_Potential_of_Parallel_Corpora_in_Ancient_Languages/links/64f49e0bfa851147de0fa850/Classical-Philology-in-the-Time-of-AI-Exploring-the-Potential-of-Parallel-Corpora-in-Ancient-Languages.pdf}, abstract = {This contribution presents an overview of Parallel Text Processing, particularly Translation Alignment, and illustrates the current status of this task in ancient languages. In the first part, we provide the fundamental principles of Parallel Texts and give an overview of their applications for the study of ancient texts. In the second part, we indicate how Parallel Texts can be leveraged to perform other NLP tasks, including automatic alignment, dynamic lexica induction, and Named Entity Recognition. In the conclusion, we emphasize current limitations and future work.}, author = {Yousef, Tariq and Palladino, Chiara and Shamsian, Farnoosh}, month = sep, year = {2023}, }
@misc{riemenschneider_exploring_2023, title = {Exploring {Large} {Language} {Models} for {Classical} {Philology}}, url = {http://arxiv.org/abs/2305.13698}, doi = {10.48550/arXiv.2305.13698}, abstract = {Recent advances in NLP have led to the creation of powerful language models for many languages including Ancient Greek and Latin. While prior work on Classical languages unanimously uses BERT, in this work we create four language models for Ancient Greek that vary along two dimensions to study their versatility for tasks of interest for Classical languages: we explore (i) encoder-only and encoder-decoder architectures using RoBERTa and T5 as strong model types, and create for each of them (ii) a monolingual Ancient Greek and a multilingual instance that includes Latin and English. We evaluate all models on morphological and syntactic tasks, including lemmatization, which demonstrates the added value of T5's decoding abilities. We further define two probing tasks to investigate the knowledge acquired by models pre-trained on Classical texts. Our experiments provide the first benchmarking analysis of existing models of Ancient Greek. Results show that our models provide significant improvements over the SoTA. The systematic analysis of model types can inform future research in designing language models for Classical languages, including the development of novel generative tasks. We make all our models available as community resources, along with a large curated pre-training corpus for Ancient Greek, to support the creation of a larger, comparable model zoo for Classical Philology. Our models and resources are available at https://github.com/Heidelberg-NLP/ancient-language-models.}, urldate = {2023-09-29}, publisher = {arXiv}, author = {Riemenschneider, Frederick and Frank, Anette}, month = may, year = {2023}, note = {arXiv:2305.13698 [cs]}, keywords = {Computer Science - Computation and Language, I.2.7}, }
@article{salden_didaktische_2023, title = {Didaktische und rechtliche {Perspektiven} auf {KI}-gestütztes {Schreiben} in der {Hochschulbildung}}, journal = {Zentrum für Wissenschaftsdidaktik der Ruhr-Universität Bochum}, author = {Salden, Peter and Leschke, Jonas}, year = {2023}, }
@article{hartman_quantitative_2023, title = {{QUANTITATIVE} {APPROACHES} {TO} {LATE} {ANTIQUE} {POETICS}: {ENUMERATION} {AND} {CONGERIES}}, issn = {1350346411}, journal = {A Late Antique Poetics?: The Jeweled Style Revisited}, author = {Hartman, Joshua and Levernier, Jacob}, year = {2023}, note = {Publisher: Bloomsbury Publishing}, pages = {75}, }
@inproceedings{warstadt_findings_2023, address = {Singapore}, title = {Findings of the {BabyLM} {Challenge}: {Sample}-{Efficient} {Pretraining} on {Developmentally} {Plausible} {Corpora}}, shorttitle = {Findings of the {BabyLM} {Challenge}}, url = {https://aclanthology.org/2023.conll-babylm.1}, doi = {10.18653/v1/2023.conll-babylm.1}, abstract = {Children can acquire language from less than 100 million words of input. Large language models are far less data-efficient: they typically require 3 or 4 orders of magnitude more data and still do not perform as well as humans on many evaluations. These intensive resource demands limit the ability of researchers to train new models and use existing models as developmentally plausible cognitive models. The BabyLM Challenge is a communal effort in which participants compete to optimize language model training on a fixed data budget. Submissions are compared on various evaluation tasks targeting grammatical ability, downstream task performance, and generalization. Participants can submit to up to three tracks with progressively looser data restrictions. From over 30 submissions, we extract concrete recommendations on how best to train data-efficient language models, and on where future efforts should (and perhaps should not) focus. The winning submissions using the LTG-BERT architecture (Samuel et al., 2023) outperformed models trained on trillions of words. Other submissions achieved strong results through training on shorter input sequences or training a student model on a pretrained teacher. Curriculum learning attempts, which accounted for a large number of submissions, were largely unsuccessful, though some showed modest improvements.}, language = {en}, urldate = {2024-01-16}, booktitle = {Proceedings of the {BabyLM} {Challenge} at the 27th {Conference} on {Computational} {Natural} {Language} {Learning}}, publisher = {Association for Computational Linguistics}, author = {Warstadt, Alex and Mueller, Aaron and Choshen, Leshem and Wilcox, Ethan and Zhuang, Chengxu and Ciro, Juan and Mosquera, Rafael and Paranjabe, Bhargavi and Williams, Adina and Linzen, Tal and Cotterell, Ryan}, year = {2023}, pages = {1--6}, }
@inproceedings{kostkan_odycy_2023, address = {Dubrovnik, Croatia}, title = {{OdyCy} – {A} general-purpose {NLP} pipeline for {Ancient} {Greek}}, booktitle = {Proceedings of the 7th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature}}, publisher = {Association for Computational Linguistics}, author = {Kostkan, Jan and Kardos, Márton and Palle Bliddal Mortensen, Jacob and Laigaard Nielbo, Kristofer}, year = {2023}, pages = {128--134}, }
@incollection{gamba_latin_2023, address = {Varna, Bulgaria}, title = {Latin {Morphology} through the {Centuries}: {Ensuring} {Consistency} for {Better} {Language} {Processing}.}, url = {https://ufal.mff.cuni.cz/biblio/attachments/2023-gamba-p3787387064232511302.pdf}, booktitle = {Proceedings of the {Ancient} {Language} {Processing} {Workshop} associated with the 14th {International} {Conference} on {Recent} {Advances} in {Natural} {Language} {Processing} {RANLP} 2023}, author = {Gamba, F. and Zeman, D.}, year = {2023}, }
@incollection{gamba_universalising_2023, address = {Washington, DC, USA}, series = {Proceedings of the {Sixth} {Workshop} on {Universal} {Dependencies} ({UDW}, {GURT}/{SyntaxFest} 2023)}, title = {Universalising {Latin} {Universal} {Dependencies}: a harmonisation of {Latin} treebanks in {UD}.}, url = {https://aclanthology.org/2023.udw-1.2/}, publisher = {March. Association for Computational Linguistics (ACL)}, author = {Gamba, F. and Zeman, D.}, year = {2023}, }
@article{grillo_meta-literature_2023, title = {Meta-{Literature} and {Mimesis} in the {Rhetorica} ad {Herennium}}, volume = {144}, number = {1}, journal = {American Journal of Philology}, author = {Grillo, Luca}, year = {2023}, pages = {41--72}, }
@misc{riemenschneider_graecia_2023, title = {Graecia capta ferum victorem cepit. {Detecting} {Latin} {Allusions} to {Ancient} {Greek} {Literature}}, url = {http://arxiv.org/abs/2308.12008}, doi = {10.48550/arXiv.2308.12008}, abstract = {Intertextual allusions hold a pivotal role in Classical Philology, with Latin authors frequently referencing Ancient Greek texts. Until now, the automatic identification of these intertextual references has been constrained to monolingual approaches, seeking parallels solely within Latin or Greek texts. In this study, we introduce SPhilBERTa, a trilingual Sentence-RoBERTa model tailored for Classical Philology, which excels at cross-lingual semantic comprehension and identification of identical sentences across Ancient Greek, Latin, and English. We generate new training data by automatically translating English texts into Ancient Greek. Further, we present a case study, demonstrating SPhilBERTa's capability to facilitate automated detection of intertextual parallels. Our models and resources are available at https://github.com/Heidelberg-NLP/ancient-language-models.}, urldate = {2023-09-29}, publisher = {arXiv}, author = {Riemenschneider, Frederick and Frank, Anette}, month = aug, year = {2023}, note = {arXiv:2308.12008 [cs]}, keywords = {Computer Science - Computation and Language, I.2.7}, }
@article{krahn_sentence_2023, title = {Sentence {Embedding} {Models} for {Ancient} {Greek} {Using} {Multilingual} {Knowledge} {Distillation}}, url = {https://arxiv.org/pdf/2308.13116.pdf}, journal = {arXiv preprint arXiv:2308.13116}, author = {Krahn, Kevin and Tate, Derrick and Lamicela, Andrew C}, year = {2023}, }
@article{van_der_lek_integrating_2023, title = {Integrating research infrastructures into teaching: {Recommendations} and best practices}, shorttitle = {Integrating research infrastructures into teaching}, url = {https://zenodo.org/record/8114407}, abstract = {The UPSKILLS needs revealed that linguistics and language-related degree programmes seldom include language data standards and research data repositories in their learning outcomes. A survey of lecturers from linguistics and language-related disciplines also exposed a number of challenges in using repositories for language data discovery, reuse and archiving. Against this backdrop, the present guide shows how teachers and trainers can leverage the CLARIN research infrastructure to help students enhance their data collection, processing and analysis, and archiving skills. By integrating research infrastructures into teaching, educators can bridge the gap between theoretical knowledge and practical aspects of linguistic research data management, equipping students with the necessary skills and competences to thrive in the evolving landscape of open science and data-driven research.}, urldate = {2023-10-02}, author = {van der Lek, Iulianna and Fišer, Darja and Samardzic, Tanja and Simonovic, Marko and Assimakopoulos, Stavros and Bernardini, Silvia and Milicevic Petrovic, Maja and Puskas, Genoveva}, month = aug, year = {2023}, note = {Publisher: Zenodo}, }
@misc{schork_kunstliche_2023, title = {Künstliche {Intelligenz} in der {Bildung}: {Drei} {Zukunftsszenarien} und fünf {Handlungsfelder}}, url = {https://ki-campus.org/sites/default/files/2023-04/2023-03_Diskussionspapier_KI_Bildung_Zukunftsszenarien_Handlungsfelder_KI-Campus.pdf}, abstract = {Die voranschreitende Digitalisierung und insbesondere der Einsatz von Künstlicher Intelligenz (KI) in der Bildung eröffnen neue Möglichkeiten des Lernens. Bildung kann stärker individuell sowie zeitlich und räumlich entgrenzt stattfinden. ChatGPT verdeutlicht, wie dynamisch die Entwicklungen im Bereich KI sind. Das in Zunahme begriffene Interesse am Thema KI allgemein sowie KI in der Bildung kommt dem erforderlichen Austausch zwischen zentralen Akteur:innen zugute. Dieses Diskussionspapier präsentiert drei Zukunftsszenarien für den Einsatz von KI in der institutionellen Bildung: Hochschule, Weiterbildung und Schule. Im Fokus stehen die übergeordneten Fragestellungen nach Voraussetzungen und Gelingensbedingungen einer erfolgreichen Anwendung sowie nach gemeinsamen Handlungsfeldern für zentrale Stakeholder. Fünf Handlungsfelder für den Einsatz von KI in institutioneller Bildung sind auf Grundlage der skizzierten Zukunftsszenarien hervorzuheben: (1) Interdisziplinäre Zusammenarbeit, (2) Qualifizierungsangebote und Kompetenzentwicklung, (3) Digitale Infrastruktur und Personal, (4) Ethik und Datensouveränität sowie (5) Interoperabilität von Daten in Bildungskontexten. Ziel des vorliegenden Diskussionspapiers ist es, den kooperativen Austausch zwischen relevanten Stakeholdern anzuregen.}, author = {Schork, Sabrina and Schleiss, Johannes and Mah, Dana-Kristin and Böhme, Katrin and Fischer, David and Mesenhöller, Janne and Paaßen, Benjamin and Schrumpf, Johannes}, year = {2023}, }
@article{motoki_more_2023, title = {More human than human: measuring {ChatGPT} political bias}, issn = {1573-7101}, shorttitle = {More human than human}, url = {https://doi.org/10.1007/s11127-023-01097-2}, doi = {10.1007/s11127-023-01097-2}, abstract = {We investigate the political bias of a large language model (LLM), ChatGPT, which has become popular for retrieving factual information and generating content. Although ChatGPT assures that it is impartial, the literature suggests that LLMs exhibit bias involving race, gender, religion, and political orientation. Political bias in LLMs can have adverse political and electoral consequences similar to bias from traditional and social media. Moreover, political bias can be harder to detect and eradicate than gender or racial bias. We propose a novel empirical design to infer whether ChatGPT has political biases by requesting it to impersonate someone from a given side of the political spectrum and comparing these answers with its default. We also propose dose-response, placebo, and profession-politics alignment robustness tests. To reduce concerns about the randomness of the generated text, we collect answers to the same questions 100 times, with question order randomized on each round. We find robust evidence that ChatGPT presents a significant and systematic political bias toward the Democrats in the US, Lula in Brazil, and the Labour Party in the UK. These results translate into real concerns that ChatGPT, and LLMs in general, can extend or even amplify the existing challenges involving political processes posed by the Internet and social media. Our findings have important implications for policymakers, media, politics, and academia stakeholders.}, language = {en}, urldate = {2023-09-15}, journal = {Public Choice}, author = {Motoki, Fabio and Neto, Valdemar Pinho and Rodrigues, Victor}, month = aug, year = {2023}, keywords = {Bias, C10, C89, ChatGPT, D83, L86, Large language models, Political bias, Z00}, }
@misc{gallegos_bias_2023, title = {Bias and {Fairness} in {Large} {Language} {Models}: {A} {Survey}}, shorttitle = {Bias and {Fairness} in {Large} {Language} {Models}}, url = {http://arxiv.org/abs/2309.00770}, doi = {10.48550/arXiv.2309.00770}, abstract = {Rapid advancements of large language models (LLMs) have enabled the processing, understanding, and generation of human-like text, with increasing integration into systems that touch our social sphere. Despite this success, these models can learn, perpetuate, and amplify harmful social biases. In this paper, we present a comprehensive survey of bias evaluation and mitigation techniques for LLMs. We first consolidate, formalize, and expand notions of social bias and fairness in natural language processing, defining distinct facets of harm and introducing several desiderata to operationalize fairness for LLMs. We then unify the literature by proposing three intuitive taxonomies, two for bias evaluation, namely metrics and datasets, and one for mitigation. Our first taxonomy of metrics for bias evaluation disambiguates the relationship between metrics and evaluation datasets, and organizes metrics by the different levels at which they operate in a model: embeddings, probabilities, and generated text. Our second taxonomy of datasets for bias evaluation categorizes datasets by their structure as counterfactual inputs or prompts, and identifies the targeted harms and social groups; we also release a consolidation of publicly-available datasets for improved access. Our third taxonomy of techniques for bias mitigation classifies methods by their intervention during pre-processing, in-training, intra-processing, and post-processing, with granular subcategories that elucidate research trends. Finally, we identify open problems and challenges for future work. Synthesizing a wide range of recent research, we aim to provide a clear guide of the existing literature that empowers researchers and practitioners to better understand and prevent the propagation of bias in LLMs.}, urldate = {2023-09-15}, publisher = {arXiv}, author = {Gallegos, Isabel O. and Rossi, Ryan A. and Barrow, Joe and Tanjim, Md Mehrab and Kim, Sungchul and Dernoncourt, Franck and Yu, Tong and Zhang, Ruiyi and Ahmed, Nesreen K.}, month = sep, year = {2023}, note = {arXiv:2309.00770 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Computers and Society, Computer Science - Machine Learning}, }
@incollection{bewersdorff_tum-digillab_2023, address = {Wiesbaden}, series = {Edition {Fachdidaktiken}}, title = {Das {TUM}-{DigiLLab}: {Lehr}-{Lernraum} sowie {Forschungs}- und {Entwicklungsumgebung} zur {Förderung} digitaler {Kompetenzen}}, isbn = {978-3-658-40109-2}, shorttitle = {Das {TUM}-{DigiLLab}}, url = {https://doi.org/10.1007/978-3-658-40109-2_10}, abstract = {Das Digitale Lehr-Lern-Labor der Technischen Universität München (TUM-DigiLLab) soll als Ort der Entwicklung, Durchführung und Beforschung von Lehr-Lernkonzepten in authentischen Anwendungskontexten helfen die Lücke zwischen theoretischer Wissensvermittlung an der Universität und der Wissensanwendung in lebensweltlichen oder professionellen Zusammenhängen der Praxis zu schließen. Einen didaktischen Schwerpunkt bildet die Entwicklung innovativer Lehr-Lernkonzepte im Themenfeld der Künstlichen Intelligenz.}, language = {de}, urldate = {2023-09-15}, booktitle = {Lehr-{Lern}-{Labore} und {Digitalisierung}}, publisher = {Springer Fachmedien}, author = {Bewersdorff, Arne and Nerdel, Claudia}, editor = {Meier, Monique and Greefrath, Gilbert and Hammann, Marcus and Wodzinski, Rita and Ziepprecht, Kathrin}, year = {2023}, doi = {10.1007/978-3-658-40109-2_10}, keywords = {Augmented Reality, Digitalisierung, Künstliche Intelligenz, Lehr-Lern-Labore, Technologiegestützte Kooperation}, pages = {137--141}, }
@article{bewersdorff_myths_2023, title = {Myths, mis- and preconceptions of artificial intelligence: {A} review of the literature}, volume = {4}, issn = {2666-920X}, shorttitle = {Myths, mis- and preconceptions of artificial intelligence}, url = {https://www.sciencedirect.com/science/article/pii/S2666920X2300022X}, doi = {10.1016/j.caeai.2023.100143}, abstract = {Artificial Intelligence (AI) is prevalent in nearly every aspect of our lives. However, recent studies have found a significant amount of confusion and misunderstanding surrounding AI. To develop effective educational programs in the field of AI, it is vital to examine and understand learners' pre- and misconceptions as well as myths about AI. This study examined a corpus of 591 studies. 25 relevant studies were identified by applying the following eligibility criteria: English-written original empirical research on education and AI and reporting AI conceptions in a formal learning context. The review found studies from six continents, with the majority conducted in Europe and North America. The studies predominantly focus on the school and university levels. Findings reveal a range of preconceptions, misconceptions, and myths about AI, such as: Learners often have limited understanding of AI on a technical level. They tend to attribute human-like characteristics or attributes to AI systems and may have narrow views of AI's scope, capabilities, and limitations. The review also shows that learners often have binary and unspecific views about the threats, dangers, and benefits of AI. Effective educational programs are key to empower learners' understanding of AI, thus helping them make informed decisions about the integration of AI in our society, rather than being swayed by misinformation and unnecessary fear. This review may help inform the development of more effective teaching and outreach strategies in AI education.}, urldate = {2023-09-15}, journal = {Computers and Education: Artificial Intelligence}, author = {Bewersdorff, Arne and Zhai, Xiaoming and Roberts, Jessica and Nerdel, Claudia}, month = jan, year = {2023}, keywords = {Artificial intelligence, Misconceptions, Preconceptions, Review}, pages = {100143}, }
@book{hose_formen_2023, address = {Stuttgart}, title = {Formen und {Funktionen} griechisch-römischer {Literatur} : {Aufsätze} zur {Literaturgeschichte} und {Literaturgeschichtsschreibung}}, isbn = {978-3-515-13411-8}, shorttitle = {Formen und {Funktionen} griechisch-römischer {Literatur}}, url = {https://d-nb.info/1273972600/04}, publisher = {Franz Steiner Verlag}, author = {Hose, Martin}, editor = {Peri, Annamaria and Thum, Tobias}, year = {2023}, }
@book{baker_chatgpt_2023, title = {{ChatGPT} für {Dummies}}, isbn = {978-3-527-84473-9}, abstract = {Profitieren auch Sie von den wunderbaren Fähigkeiten von ChatGPT. Pam Baker erklärt Ihnen, wie ChatGPT funktioniert und wie Sie den Chatbot gewinnbringend einsetzen - sei es bei der Texterstellung für Werbezwecke, der Kundenbetreuung auf einer Webseite oder für die Beantwortung all jener Fragen, auf die Sie bisher keine Antwort gefunden haben. Sie lernen die Stärken und Schwächen des Tools kennen. So können Sie besser einschätzen, wo es Ihnen nutzt und wo Sie besser weiter arbeiten wie bisher. Erschließen Sie das Potenzial von ChatGPT!}, language = {de}, publisher = {John Wiley \& Sons}, author = {Baker, Pam}, month = aug, year = {2023}, note = {Google-Books-ID: dEbREAAAQBAJ}, keywords = {Computers / Artificial Intelligence / General, Computers / Computer Science, Computers / Information Technology}, }
@misc{zhao_survey_2023, title = {A {Survey} of {Large} {Language} {Models}}, url = {http://arxiv.org/abs/2303.18223}, abstract = {Ever since the Turing Test was proposed in the 1950s, humans have explored the mastering of language intelligence by machine. Language is essentially a complex, intricate system of human expressions governed by grammatical rules. It poses a significant challenge to develop capable artificial intelligence (AI) algorithms for comprehending and grasping a language. As a major approach, language modeling has been widely studied for language understanding and generation in the past two decades, evolving from statistical language models to neural language models. Recently, pre-trained language models (PLMs) have been proposed by pretraining Transformer models over large-scale corpora, showing strong capabilities in solving various natural language processing (NLP) tasks. Since the researchers have found that model scaling can lead to an improved model capacity, they further investigate the scaling effect by increasing the parameter scale to an even larger size. Interestingly, when the parameter scale exceeds a certain level, these enlarged language models not only achieve a significant performance improvement, but also exhibit some special abilities (e.g., incontext learning) that are not present in small-scale language models (e.g., BERT). To discriminate the language models in different parameter scales, the research community has coined the term large language models (LLM) for the PLMs of significant size (e.g., containing tens or hundreds of billions of parameters). Recently, the research on LLMs has been largely advanced by both academia and industry, and a remarkable progress is the launch of ChatGPT (a powerful AI chatbot developed based on LLMs), which has attracted widespread attention from society. The technical evolution of LLMs has been making an important impact on the entire AI community, which would revolutionize the way how we develop and use AI algorithms. Considering this rapid technical progress, in this survey, we review the recent advances of LLMs by introducing the background, key findings, and mainstream techniques. In particular, we focus on four major aspects of LLMs, namely pre-training, adaptation tuning, utilization, and capacity evaluation. Furthermore, we also summarize the available resources for developing LLMs and discuss the remaining issues for future directions. This survey provides an up-to-date review of the literature on LLMs, which can be a useful resource for both researchers and engineers.}, language = {en}, urldate = {2023-09-14}, publisher = {arXiv}, author = {Zhao, Wayne Xin and Zhou, Kun and Li, Junyi and Tang, Tianyi and Wang, Xiaolei and Hou, Yupeng and Min, Yingqian and Zhang, Beichen and Zhang, Junjie and Dong, Zican and Du, Yifan and Yang, Chen and Chen, Yushuo and Chen, Zhipeng and Jiang, Jinhao and Ren, Ruiyang and Li, Yifan and Tang, Xinyu and Liu, Zikang and Liu, Peiyu and Nie, Jian-Yun and Wen, Ji-Rong}, month = sep, year = {2023}, note = {arXiv:2303.18223 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language}, }
@article{linka_pain_2023, title = {Pain in {Classical} {Greek} {Texts}}, copyright = {Copyright (c) 2023}, issn = {2364-7957}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/93792}, doi = {10.11588/dco.2023.9.93792}, abstract = {Texte aus der klassischen griechischen Periode spielen eine entscheidende Rolle in der historischen Entwicklung der westlichen Wissenschaft und Philosophie. Das Konzept des Schmerzes ist vor allem für zwei Bereiche des menschlichen Wissens, nämlich Medizin und Ethik, von zentraler Bedeutung. Obwohl der Begriff des Schmerzes für beide Bereiche wichtig ist, werden sie in der Wissenschaft meist getrennt voneinander untersucht. Wir betrachten sie gemeinsam, im Kontext der gesamten erhaltenen klassischen griechischen Literatur. Dies wird durch unseren methodischen Ansatz ermöglicht, der traditionelle Interpretationsansätze mit computergestützten Textanalysemethoden kombiniert und so die Untersuchung einer großen Menge von Textdaten ermöglicht. Wenn wir den Kontext der Verwendung einzelner Wörter, die Schmerz bezeichnen, in den Texten verschiedener Gattungen oder Themen betrachten, können wir relativ stabile semantische Cluster identifizieren, auf die auf Schmerz hindeuten, wie etwa Pathologien, Emotionen oder Moral. Auf diese Weise sind wir in der Lage, die Rolle bestimmter Schmerzwörter, ihre Bedeutung und ihre wechselseitigen Beziehungen in klassischen griechischen Texten zu erfassen. Unser Ansatz ermöglicht es uns auch, die Rolle verschiedener textueller Subkorpora (philosophisch, medizinisch) für die Art und Weise zu erkennen, wie Schmerz in klassischen griechischen Texten aufgefasst wurde.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Linka, Vojtěch and Kaše, Vojtěch}, month = apr, year = {2023}, keywords = {Semantische Analyse}, pages = {1--14}, }
@misc{burns_latincy_2023, title = {{LatinCy}: {Synthetic} {Trained} {Pipelines} for {Latin} {NLP}}, shorttitle = {{LatinCy}}, url = {http://arxiv.org/abs/2305.04365}, doi = {10.48550/arXiv.2305.04365}, abstract = {This paper introduces LatinCy, a set of trained general purpose Latin-language "core" pipelines for use with the spaCy natural language processing framework. The models are trained on a large amount of available Latin data, including all five of the Latin Universal Dependency treebanks, which have been preprocessed to be compatible with each other. The result is a set of general models for Latin with good performance on a number of natural language processing tasks (e.g. the top-performing model yields POS tagging, 97.41\% accuracy; lemmatization, 94.66\% accuracy; morphological tagging 92.76\% accuracy). The paper describes the model training, including its training data and parameterization, and presents the advantages to Latin-language researchers of having a spaCy model available for NLP work.}, urldate = {2023-07-16}, publisher = {arXiv}, author = {Burns, Patrick J.}, month = may, year = {2023}, note = {arXiv:2305.04365 [cs] version: 1}, keywords = {Computer Science - Computation and Language}, }
@inproceedings{sprugnoli_sentiment_2022, title = {Sentiment {Analysis} of {Latin} {Poetry}: {First} {Experiments} on the {Odes} of {Horace}}, url = {https://books.openedition.org/aaccademia/10854}, booktitle = {Proceedings of the {Eighth} {Italian} {Conference} on {Computational} {Linguistics} {CliC}-{It} 2021}, author = {Sprugnoli, Rachele and Mambrini, Francesco and Passarotti, Marco and Moretti, Giovanni}, year = {2022}, }
@inproceedings{yamshchikov_bert_2022, title = {{BERT} in {Plutarch}'s {Shadows}}, url = {https://aclanthology.org/2022.emnlp-main.407.pdf}, doi = {10.18653/v1/2022.emnlp-main.407}, booktitle = {Proceedings of the 2022 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing}}, publisher = {Association for Computational Linguistics}, author = {Yamshchikov, Ivan P and Tikhonov, Alexey and Pantis, Yorgos and Schubert, Charlotte and Jost, Jürgen}, year = {2022}, pages = {6071--6080}, }
@article{assael_restoring_2022, title = {Restoring and attributing ancient texts using deep neural networks}, volume = {603}, copyright = {2022 The Author(s)}, issn = {1476-4687}, url = {https://www.nature.com/articles/s41586-022-04448-z}, doi = {10.1038/s41586-022-04448-z}, abstract = {Ancient history relies on disciplines such as epigraphy—the study of inscribed texts known as inscriptions—for evidence of the thought, language, society and history of past civilizations1. However, over the centuries, many inscriptions have been damaged to the point of illegibility, transported far from their original location and their date of writing is steeped in uncertainty. Here we present Ithaca, a deep neural network for the textual restoration, geographical attribution and chronological attribution of ancient Greek inscriptions. Ithaca is designed to assist and expand the historian’s workflow. The architecture of Ithaca focuses on collaboration, decision support and interpretability. While Ithaca alone achieves 62\% accuracy when restoring damaged texts, the use of Ithaca by historians improved their accuracy from 25\% to 72\%, confirming the synergistic effect of this research tool. Ithaca can attribute inscriptions to their original location with an accuracy of 71\% and can date them to less than 30 years of their ground-truth ranges, redating key texts of Classical Athens and contributing to topical debates in ancient history. This research shows how models such as Ithaca can unlock the cooperative potential between artificial intelligence and historians, transformationally impacting the way that we study and write about one of the most important periods in human history.}, language = {en}, number = {7900}, urldate = {2023-04-26}, journal = {Nature}, author = {Assael, Yannis and Sommerschield, Thea and Shillingford, Brendan and Bordbar, Mahyar and Pavlopoulos, John and Chatzipanagiotou, Marita and Androutsopoulos, Ion and Prag, Jonathan and de Freitas, Nando}, month = mar, year = {2022}, note = {Number: 7900 Publisher: Nature Publishing Group}, keywords = {Archaeology, Computer science, History}, pages = {280--283}, }
@article{mcgillivray_new_2022, title = {A {New} {Corpus} {Annotation} {Framework} for {Latin} {Diachronic} {Lexical} {Semantics}}, volume = {21}, url = {doi:10.1515/joll-2022-2007.}, number = {1}, journal = {Journal of Latin linguistics}, author = {McGillivray, Barbara and Kondakova, Daria and Burman, Annie and Dell’Oro, Francesca and Bermúdez Sabel, Helena and Marongiu, Paola and Cruz, Manuel Márquez}, year = {2022}, keywords = {Latin lexical semantics, LatinISE corpus, annotation, semantic change}, pages = {47--105}, }
@inproceedings{pavlopoulos_sentiment_2022, title = {Sentiment {Analysis} of {Homeric} {Text}: {The} 1st {Book} of {Iliad}}, url = {https://aclanthology.org/2022.lrec-1.765.pdf}, author = {Pavlopoulos, John and Xenos, Alexandros and Picca, Davide}, year = {2022}, pages = {7071--7077}, }
@incollection{de_graaf_agile_2022, address = {Marseille, France}, title = {{AGILe}: {The} {First} {Lemmatizer} for {Ancient} {Greek} {Inscriptions}}, booktitle = {Proceedings of the 13th {Conference} on {Language} {Resources} and {Evaluation} ({LREC} 2022)}, author = {de Graaf, E. and Stopponi, S. and Bos, J. and Peels-Matthey, S. and Nissm, M.}, year = {2022}, pages = {5334--5344}, }
@article{prieto_espinosa_corpus_2022, title = {El {Corpus} {Documentale} {Latinum} {Hispaniarum} ({CODOLHisp}), una plataforma digital d’accés conjunt per a l’estudi del llatí medieval hispànic}, url = {https://raco.cat/index.php/LlenguaLiteratura/article/view/399457}, journal = {Llengua i literatura : revista anual de la Societat Catalana de Llengua i Literatura}, author = {Prieto Espinosa, Carlos}, year = {2022}, pages = {204--207}, }
@article{forstall_towards_2022, title = {Towards a {Linked} {Open} {Data} {Resource} for {Direct} {Speech} {Acts} in {Greek} and {Latin} {Epic}}, volume = {37}, doi = {doi:10.1093/llc/fqac006.}, number = {4}, journal = {Digital scholarship in the humanities}, author = {Forstall, Christopher W. and Finkmann, Simone and Verhelst, Berenice}, year = {2022}, pages = {972--981}, }
@book{regnault_annotation_2022, address = {Paris, Francehttps://hal-lirmm.ccsd.cnrs.fr/AO-LINGUISTIQUE/tel-04069848v1.}, title = {Annotation et analyse syntaxique de corpus hétérogènes : le cas du français médiéval}, url = {https://hal-lirmm.ccsd.cnrs.fr/AO-LINGUISTIQUE/tel-04069848v1}, publisher = {Univ. de la Sorbonne Nouvelle (Paris III)}, author = {Regnault, Mathilde}, year = {2022}, }
@article{kenty_irony_2022, title = {Irony and {Figured} {Language} in {Cicero}’s {Letter} to {Lucceius}}, volume = {118}, language = {English}, number = {1}, journal = {Classical Journal}, author = {Kenty, Joanna}, year = {2022}, pages = {50--89}, }
@inproceedings{sprugnoli_overview_2022, address = {Marseille, France}, title = {Overview of the {EvaLatin} 2022 {Evaluation} {Campaign}}, url = {https://aclanthology.org/2022.lt4hala-1.29}, abstract = {This paper describes the organization and the results of the second edition of EvaLatin, the campaign for the evaluation of Natural Language Processing tools for Latin. The three shared tasks proposed in EvaLatin 2022, i.,e.,Lemmatization, Part-of-Speech Tagging and Features Identification, are aimed to foster research in the field of language technologies for Classical languages. The shared dataset consists of texts mainly taken from the LASLA corpus. More specifically, the training set includes only prose texts of the Classical period, whereas the test set is organized in three sub-tasks: a Classical sub-task on a prose text of an author not included in the training data, a Cross-genre sub-task on poetic and scientific texts, and a Cross-time sub-task on a text of the 15th century. The results obtained by the participants for each task and sub-task are presented and discussed.}, urldate = {2023-10-06}, booktitle = {Proceedings of the {Second} {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, publisher = {European Language Resources Association}, author = {Sprugnoli, Rachele and Passarotti, Marco and Cecchini, Flavio Massimiliano and Fantoli, Margherita and Moretti, Giovanni}, month = jun, year = {2022}, pages = {183--188}, }
@inproceedings{passarotti_issues_2022, title = {Issues in {Building} the {LiLa} {Knowledge} {Base} of {Interoperable} {Linguistic} {Resources} for {Latin}}, url = {https://zenodo.org/doi/10.5281/zenodo.7263412}, author = {Passarotti, Marco and Mambrini, Francesco}, year = {2022}, }
@article{engelhardt_how_2022, title = {How to be {FAIR} with your data}, copyright = {https://creativecommons.org/licenses/by/4.0/}, url = {https://www.univerlag.uni-goettingen.de/handle/3/isbn-978-3-86395-539-7}, doi = {10.17875/gup2022-1915}, abstract = {Softcover, 17x24}, language = {eng}, urldate = {2023-10-02}, author = {Engelhardt, Claudia and Barthauer, Raisa and Biernacka, Katarzyna and Coffey, Aoife and Cornet, Ronald and Danciu, Alina and Demchenko, Yuri and Downes, Stephen and Erdmann, Christopher and Garbuglia, Federica and Germer, Kerstin and Helbig, Kerstin and Hellström, Margareta and Hettne, Kristina and Hibbert, Dawn and Jetten, Mijke and Karimova, Yulia and Hansen, Karsten Kryger and Kuusniemi, Mari Elisa and Letizia, Viviana and McCutcheon, Valerie and McGillivray, Barbara and Ostrop, Jenny and Petersen, Britta and Petrus, Ana and Reichmann, Stefan and Rettberg, Najla and Reverté, Carmen and Rochlin, Nick and Saenen, Bregt and Schmidt, Birgit and Scholten, Jolien and Shanahan, Hugh and Straube, Armin and Eynden, Veerle Van den and Vandendorpe, Justine and Venkataram, Shanmugasundaram and Vieira, André and Wiljes, Cord and Wuttke, Ulrike and Yeomans, Joanne and Zhou, Biru}, year = {2022}, note = {Accepted: 2022-05-13T08:33:14Z Artwork Medium: Print Interview Medium: Print}, }
@misc{wienrich_ai_2022, title = {{AI} {Literacy}: {Kompetenzdimensionen} und {Einflussfaktoren} im {Kontext} von {Arbeit}}, url = {https://www.denkfabrik-bmas.de/fileadmin/Downloads/Publikationen/AI_Literacy_Kompetenzdimensionen_und_Einflussfaktoren_im_Kontext_von_Arbeit.pdf}, author = {Wienrich, Carolin and Carolus, Astrid and Augustin, Yannik and Markus, André}, year = {2022}, }
@incollection{egger_natural_2022, address = {Cham}, series = {Tourism on the {Verge}}, title = {Natural {Language} {Processing} ({NLP}): {An} {Introduction}}, isbn = {978-3-030-88389-8}, shorttitle = {Natural {Language} {Processing} ({NLP})}, url = {https://doi.org/10.1007/978-3-030-88389-8_15}, abstract = {With the increase in internet usage, the amount of available textual data has also continued to increase rapidly. In addition, the development of stronger computers has enabled the processing of data to become much easier. The tourism field has a strong potential to utilize such data available on the internet; yet, on the other hand, a high proportion of available data is unlabelled and unprocessed. In order to use them effectively, new methods and new approaches are needed. In this regard, the area of Natural Language Processing (NLP) helps researchers to utilize textual data and develop an understanding of text analysis. By using machine learning approaches, text mining potential can expand enormously, leading to deeper insights, a better understanding of social phenomena, and, thus, also a better basis for decision-making. As such, this chapter will provide the reader with the basics of NLP as well as present the text pre-processing procedure in detail.}, language = {en}, urldate = {2023-09-14}, booktitle = {Applied {Data} {Science} in {Tourism}: {Interdisciplinary} {Approaches}, {Methodologies}, and {Applications}}, publisher = {Springer International Publishing}, author = {Egger, Roman and Gokce, Enes}, editor = {Egger, Roman}, year = {2022}, doi = {10.1007/978-3-030-88389-8_15}, keywords = {Feature extraction, NER, POS, Pre-processing, Text cleaning}, pages = {307--334}, }
@inproceedings{peverelli_process_2022, address = {Antwerp}, title = {The {Process} of {Imitatio} {Through} {Stylometric} {Analysis}: the {Case} of {Terence}’s {Eunuchus}}, abstract = {The Early Modern Era is at the forefront of a widespread enthusiasm for Latin works: texts from classical antiquity are given new life, widely re-printed, studied and even repeatedly staged, in the case of dramas, throughout Europe. Also, new Latin comedies are again written in quantities never seen before (at least 10,000 works published 1500 to 1800 are known). The authors themselves, within the game of literary imitation (the process of imitatio), start to mimic the style of ancient authors, and Terence’s dramas in particular were considered the prime sources of reuse for many decades. Via a case study ”the reception of Terence’s Eunuchus in Early Modern literature”, we take a deep dive into the mechanisms of literary imitation. Our analysis is based on four comedy corpora in Latin, Italian, French and English, spanning roughly 3 centuries (1400-1700). To assess the problem of language shi昀琀 and multi-language intercorpora analysis, we base our experiments on translations of the Eunuchus, one for each sub-corpus. Through the use of tools drawn from the 昀椀eld of Stylometry, we address the topic of text reuse and textual similarities between Terence’s text and Early-Modern corpora to get a better grasp on the internal 昀氀uctuations of the imitation game between Early Modern and Classical authors.}, language = {en}, author = {Peverelli, Andrea and van Erp, Marieke and Bloemendal, Jan}, year = {2022}, pages = {337--354}, }
@misc{nagy_stylometric_2022, title = {Some {Stylometric} {Remarks} on {Ovid}'s {Heroides} and the {Epistula} {Sapphus}}, url = {http://arxiv.org/abs/2202.11864}, doi = {10.48550/arXiv.2202.11864}, abstract = {This article aims to contribute to two well-worn areas of debate in classical Latin philology, relating to Ovid's Heroides. The first is the question of the authenticity (and, to a lesser extent the correct position) of the letter placed fifteenth by almost every editor -- the so-called Epistula Sapphus (henceforth ES). The secondary question, although perhaps now less fervently debated, is the authenticity of the 'Double Heroides', placed by those who accept them as letters 16-21. I employ a variety of methods drawn from the domain of computational stylometry to consider the poetics and the lexico-grammatical features of these elegiac poems in the broader context of a corpus of 'shorter' (from 20 to 546 lines) elegiac works from five authors (266 poems in all) comprising more or less all of the non-fragmentary classical corpus. Based on a variety of techniques, every measure gives clear indication that the poetic style of the Heroides is Ovidian, but distinctive; they can be accurately isolated from Ovid more broadly. The Single and Double Heroides split into two clear groups, with the ES grouped consistently with the single letters. Furthermore, by comparing the style of the letters with the 'early' (although there are complications in this label) works of the Amores and the late works of the Ex Ponto, the evidence supports sequential composition -- meaning that the ES is correctly placed -- and, further, supports the growing consensus that the double letters were composed significantly later, in exile.}, urldate = {2023-08-26}, publisher = {arXiv}, author = {Nagy, Ben}, month = feb, year = {2022}, note = {arXiv:2202.11864 [cs]}, keywords = {Computer Science - Computation and Language}, }
@article{nagy_rhyme_2022, title = {Rhyme in classical {Latin} poetry: {Stylistic} or stochastic?}, volume = {37}, issn = {2055-7671}, shorttitle = {Rhyme in classical {Latin} poetry}, url = {https://doi.org/10.1093/llc/fqab105}, doi = {10.1093/llc/fqab105}, abstract = {This study offers the first broad quantitative analysis of the use of rhyme in classical Latin hexameter and elegiac verse. The data and tools developed for the analysis are released under a permissive open source license. These include software to create an accurate phonetic transcription of Latin verse from the Musisque Deoque corpus; a system for scoring rhyme via phonetic similarity; and a system for generating large amounts of metrically correct, stochastic Latin verse (useful for analysis baselines). Further to this, some initial analysis is performed: first via descriptive statistics and then with two unsupervised multivariate analyses using dimension reduction methods. The study examines nineteen works by twelve authors, comprising about 96,000 lines. First and foremost, the results suggest that rhyme was consciously used by classical authors, but to different extents and in different ways. There is a solid and detectable stylistic separation between the use of rhyme in elegy and epic, and possibly also between satire and the rest. Within genres, authors can be stylistically separated with a small set of features. On the negative side, it appears that the stylistic signal from rhyme is fairly faint, and so forensic analysis (e.g. for authorship attribution) is not presently recommended on texts that are shorter than several thousand lines.}, number = {4}, urldate = {2023-08-26}, journal = {Digital Scholarship in the Humanities}, author = {Nagy, Ben}, month = dec, year = {2022}, pages = {1097--1118}, }
@article{chastang_named_2021, title = {A {Named} {Entity} {Recognition} {Model} for {Medieval} {Latin} {Charters}}, volume = {15}, number = {4}, journal = {Digital Humanities Quarterly}, author = {Chastang, Pierre and Torres Aguilar, Sergio Octavio and Tannier, Xavier}, year = {2021}, }
@article{ehrmann_named_2021, title = {Named {Entity} {Recognition} and {Classification} on {Historical} {Documents}: {A} {Survey}}, volume = {56}, issn = {0360-0300, 1557-7341}, shorttitle = {Named {Entity} {Recognition} and {Classification} on {Historical} {Documents}}, url = {http://arxiv.org/abs/2109.11406}, doi = {10.1145/3604931}, abstract = {After decades of massive digitisation, an unprecedented amount of historical documents is available in digital format, along with their machine-readable texts. While this represents a major step forward with respect to preservation and accessibility, it also opens up new opportunities in terms of content mining and the next fundamental challenge is to develop appropriate technologies to efficiently search, retrieve and explore information from this 'big data of the past'. Among semantic indexing opportunities, the recognition and classification of named entities are in great demand among humanities scholars. Yet, named entity recognition (NER) systems are heavily challenged with diverse, historical and noisy inputs. In this survey, we present the array of challenges posed by historical documents to NER, inventory existing resources, describe the main approaches deployed so far, and identify key priorities for future developments.}, number = {2}, urldate = {2025-01-01}, journal = {ACM Computing Surveys}, author = {Ehrmann, Maud and Hamdi, Ahmed and Pontes, Elvys Linhares and Romanello, Matteo and Doucet, Antoine}, month = sep, year = {2021}, note = {arXiv:2109.11406 [cs]}, keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning}, pages = {1--47}, }
@inproceedings{gehrmann_gem_2021, address = {Online}, title = {The {GEM} {Benchmark}: {Natural} {Language} {Generation}, its {Evaluation} and {Metrics}}, shorttitle = {The {GEM} {Benchmark}}, url = {https://aclanthology.org/2021.gem-1.10/}, doi = {10.18653/v1/2021.gem-1.10}, abstract = {We introduce GEM, a living benchmark for natural language Generation (NLG), its Evaluation, and Metrics. Measuring progress in NLG relies on a constantly evolving ecosystem of automated metrics, datasets, and human evaluation standards. Due to this moving target, new models often still evaluate on divergent anglo-centric corpora with well-established, but flawed, metrics. This disconnect makes it challenging to identify the limitations of current models and opportunities for progress. Addressing this limitation, GEM provides an environment in which models can easily be applied to a wide set of tasks and in which evaluation strategies can be tested. Regular updates to the benchmark will help NLG research become more multilingual and evolve the challenge alongside models. This paper serves as the description of the data for the 2021 shared task at the associated GEM Workshop.}, urldate = {2025-01-15}, booktitle = {Proceedings of the 1st {Workshop} on {Natural} {Language} {Generation}, {Evaluation}, and {Metrics} ({GEM} 2021)}, publisher = {Association for Computational Linguistics}, author = {Gehrmann, Sebastian and Adewumi, Tosin and Aggarwal, Karmanya and Ammanamanchi, Pawan Sasanka and Aremu, Anuoluwapo and Bosselut, Antoine and Chandu, Khyathi Raghavi and Clinciu, Miruna-Adriana and Das, Dipanjan and Dhole, Kaustubh and Du, Wanyu and Durmus, Esin and Dušek, Ondřej and Emezue, Chris Chinenye and Gangal, Varun and Garbacea, Cristina and Hashimoto, Tatsunori and Hou, Yufang and Jernite, Yacine and Jhamtani, Harsh and Ji, Yangfeng and Jolly, Shailza and Kale, Mihir and Kumar, Dhruv and Ladhak, Faisal and Madaan, Aman and Maddela, Mounica and Mahajan, Khyati and Mahamood, Saad and Majumder, Bodhisattwa Prasad and Martins, Pedro Henrique and McMillan-Major, Angelina and Mille, Simon and van Miltenburg, Emiel and Nadeem, Moin and Narayan, Shashi and Nikolaev, Vitaly and Niyongabo Rubungo, Andre and Osei, Salomey and Parikh, Ankur and Perez-Beltrachini, Laura and Rao, Niranjan Ramesh and Raunak, Vikas and Rodriguez, Juan Diego and Santhanam, Sashank and Sedoc, João and Sellam, Thibault and Shaikh, Samira and Shimorina, Anastasia and Sobrevilla Cabezudo, Marco Antonio and Strobelt, Hendrik and Subramani, Nishant and Xu, Wei and Yang, Diyi and Yerukola, Akhila and Zhou, Jiawei}, editor = {Bosselut, Antoine and Durmus, Esin and Gangal, Varun Prashant and Gehrmann, Sebastian and Jernite, Yacine and Perez-Beltrachini, Laura and Shaikh, Samira and Xu, Wei}, month = aug, year = {2021}, pages = {96--120}, }
@article{nasar_named_2021, title = {Named {Entity} {Recognition} and {Relation} {Extraction}: {State}-of-the-{Art}}, volume = {54}, issn = {0360-0300}, shorttitle = {Named {Entity} {Recognition} and {Relation} {Extraction}}, url = {https://doi.org/10.1145/3445965}, doi = {10.1145/3445965}, abstract = {With the advent of Web 2.0, there exist many online platforms that result in massive textual-data production. With ever-increasing textual data at hand, it is of immense importance to extract information nuggets from this data. One approach towards effective harnessing of this unstructured textual data could be its transformation into structured text. Hence, this study aims to present an overview of approaches that can be applied to extract key insights from textual data in a structured way. For this, Named Entity Recognition and Relation Extraction are being majorly addressed in this review study. The former deals with identification of named entities, and the latter deals with problem of extracting relation between set of entities. This study covers early approaches as well as the developments made up till now using machine learning models. Survey findings conclude that deep-learning-based hybrid and joint models are currently governing the state-of-the-art. It is also observed that annotated benchmark datasets for various textual-data generators such as Twitter and other social forums are not available. This scarcity of dataset has resulted into relatively less progress in these domains. Additionally, the majority of the state-of-the-art techniques are offline and computationally expensive. Last, with increasing focus on deep-learning frameworks, there is need to understand and explain the under-going processes in deep architectures.}, number = {1}, urldate = {2025-01-01}, journal = {ACM Comput. Surv.}, author = {Nasar, Zara and Jaffry, Syed Waqar and Malik, Muhammad Kamran}, month = feb, year = {2021}, pages = {20:1--20:39}, }
@misc{gebru_datasheets_2021, title = {Datasheets for {Datasets}}, url = {http://arxiv.org/abs/1803.09010}, doi = {10.48550/arXiv.1803.09010}, abstract = {The machine learning community currently has no standardized process for documenting datasets, which can lead to severe consequences in high-stakes domains. To address this gap, we propose datasheets for datasets. In the electronics industry, every component, no matter how simple or complex, is accompanied with a datasheet that describes its operating characteristics, test results, recommended uses, and other information. By analogy, we propose that every dataset be accompanied with a datasheet that documents its motivation, composition, collection process, recommended uses, and so on. Datasheets for datasets will facilitate better communication between dataset creators and dataset consumers, and encourage the machine learning community to prioritize transparency and accountability.}, urldate = {2024-09-03}, publisher = {arXiv}, author = {Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Daumé III, Hal and Crawford, Kate}, month = dec, year = {2021}, note = {arXiv:1803.09010 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Databases, Computer Science - Machine Learning}, }
@inproceedings{singh_pilot_2021, address = {Punta Cana, Dominican Republic (online)}, title = {A {Pilot} {Study} for {BERT} {Language} {Modelling} and {Morphological} {Analysis} for {Ancient} and {Medieval} {Greek}}, url = {https://aclanthology.org/2021.latechclfl-1.15}, doi = {10.18653/v1/2021.latechclfl-1.15}, abstract = {This paper presents a pilot study to automatic linguistic preprocessing of Ancient and Byzantine Greek, and morphological analysis more specifically. To this end, a novel subword-based BERT language model was trained on the basis of a varied corpus of Modern, Ancient and Post-classical Greek texts. Consequently, the obtained BERT embeddings were incorporated to train a fine-grained Part-of-Speech tagger for Ancient and Byzantine Greek. In addition, a corpus of Greek Epigrams was manually annotated and the resulting gold standard was used to evaluate the performance of the morphological analyser on Byzantine Greek. The experimental results show very good perplexity scores (4.9) for the BERT language model and state-of-the-art performance for the fine-grained Part-of-Speech tagger for in-domain data (treebanks containing a mixture of Classical and Medieval Greek), as well as for the newly created Byzantine Greek gold standard data set. The language models and associated code are made available for use at https://github.com/pranaydeeps/Ancient-Greek-BERT}, urldate = {2023-10-05}, booktitle = {Proceedings of the 5th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature}}, publisher = {Association for Computational Linguistics}, author = {Singh, Pranaydeep and Rutten, Gorik and Lefever, Els}, month = nov, year = {2021}, pages = {128--137}, }
@misc{sprugnoli_sentiment_2021, title = {Sentiment {Analysis} for {Latin}: a {Journey} from {Seneca} to {Thomas} {Aquinas}}, shorttitle = {Sentiment {Analysis} for {Latin}}, url = {https://zenodo.org/record/4575431#.YKelWoMzbJw}, abstract = {While the main applications of resources and tools for sentiment analysis typically fall within the scope of fields like customer experience and social media monitoring, there is an increasing interest in extending their range to texts written in ancient and historical languages. Such interest mirrors the substantial growth of the area dedicated to building and using linguistic resources for these languages, which are essential for accessing and understanding the Classical tradition. In this talk, we will present the methodology we followed to create and evaluate a new set of Latin sentiment lexicons, and the process of inclusion of a prior polarity lexicon of Latin lemmas in a knowledge base of interoperable linguistic resources developed within the ERC project “LiLa: Linking Latin”. We will discuss the main challenges we face when working with ancient languages (e.g., lack of native speakers, limited amount of data, unusual textual genres for the sentiment analysis task, such as philosophical or documentary texts) and we will describe two use cases underscoring the importance of an interdisciplinary approach combining computational linguistics, semantic web and humanities practices.}, urldate = {2021-05-21}, author = {Sprugnoli, Rachele}, month = mar, year = {2021}, doi = {10.5281/zenodo.4575431}, keywords = {computational linguistics, latin language, sentiment analysis}, }
@article{buccheri_semantic_2021, title = {Semantic {Analysis} and {Frequency} {Effects} of {Conceptual} {Metaphors} of {Emotions} in {Latin} : {From} a {Corpus}-{Based} {Approach} to a {Dictionary} of {Latin} {Metaphors}}, volume = {20}, url = {doi:10.1515/joll-2021-2002.}, language = {English}, number = {2}, journal = {Journal of Latin linguistics}, author = {Buccheri, Alessandro and De Felice, Irene and Fedriani, Chiara and Short, William M.}, year = {2021}, pages = {163--189}, }
@misc{korkiakangas_late_2021, series = {Corpora}, title = {Late {Latin} {Charter} {Treebank}: contents and annotation}, url = {https://researchportal.helsinki.fi/en/publications/late-latin-charter-treebank-contents-and-annotation}, number = {16}, author = {Korkiakangas, T.}, year = {2021}, }
@article{czeti_structure_2021, title = {The structure of narrative in the story of {Baucis} and {Philemon}}, volume = {61}, language = {English}, journal = {Acta Antiqua Academiae Scientiarum Hungaricae}, author = {Czeti, István}, year = {2021}, pages = {243--267}, }
@incollection{tahmasebi_lexical_2021, series = {Language {Variation}}, title = {Lexical semantic change for {Ancient} {Greek} and {Latin}}, copyright = {Copyright (c) 2021 Nina Tahmasebi, Lars Borin, Adam Jatowt, Yang Xu, Simon Hengchen (Volume Editor)}, isbn = {978-3-96110-312-6}, url = {https://langsci-press.org/catalog/view/303/3035/2382-1}, abstract = {Change and its precondition, variation, are inherent in languages. Over time, new words enter the lexicon, others become obsolete, and existing words acquire new senses. Associating a word with its correct meaning in its historical context is a central challenge in diachronic research. Historical corpora of classical languages, such as Ancient Greek and Latin, typically come with rich metadata, and existing models are limited by their inability to exploit contextual information beyond the document timestamp. While embedding-based methods feature among the current state of the art systems, they are lacking in their interpretative power. In contrast, Bayesian models provide explicit and interpretable representations of semantic change phenomena. In this chapter we build on GASC, a recent computational approach to semantic change based on a dynamic Bayesian mixture model. In this model, the evolution of word senses over time is based not only on distributional information of lexical nature, but also on text genres. We provide a systematic comparison of dynamic Bayesian mixture models for semantic change with state-of-the-art embedding-based models. On top of providing a full description of meaning change over time, we show that Bayesian mixture models are highly competitive approaches to detect binary semantic change in both Ancient Greek and Latin.}, language = {en}, number = {6}, urldate = {2023-07-24}, booktitle = {Computational approaches to semantic change}, publisher = {Language Science Press}, author = {Perrone, Valerio and Hengchen, Simon and Palma, Marco and Vatri, Alessandro and Smith, Jim Q. and McGillivray, Barbara}, editor = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang and Hengchen, Simon}, month = feb, year = {2021}, doi = {10.5281/zenodo.5040241}, note = {Publication Title: Language Science Press}, pages = {287--310}, }
@article{gledic_survey_2021, title = {Survey of curricula: {Linguistics} and language-related degrees in {Europe}}, shorttitle = {Survey of curricula}, url = {https://zenodo.org/record/5030861}, abstract = {The needs analysis of the UPSKILLS project is the foundation for all subsequent project activities, and the survey of curricula as its first step is designed to provide insights for finetuning the interventions and materials that will be designed during the lifetime of the project, as well as for enlarging the pool of stakeholders to whom the project results will be disseminated. The survey of curricula has several steps: drawing a list of European language and linguistics degrees from international ranking websites, selecting and analyzing a representative sample of degrees based on a set of indicators agreed upon by all partners, and additional studying of a selection of degrees that the partners identified as exemplary in the context of the UPSKILLS project.}, urldate = {2023-10-02}, author = {Gledić, Jelena and Đukanović, Maja and Miličević Petrović, Maja and van der Lek, Iulianna and Assimakopoulos, Stavros}, month = jun, year = {2021}, note = {Publisher: Zenodo}, }
@article{gledic_upskills_2021, title = {{UPSKILLS} guidelines for {Learning} {Content} {Creation}}, url = {https://zenodo.org/record/8302296}, abstract = {The core of the UPSKILLS project is the production of learning content aimed at students in language- and linguistics-related fields (modern languages and cultures, translation, general linguistics, etc.) and lecturers who want to incorporate the developed content and/or add their own, into their teaching. The topics are selected in light of a comparative analysis of the current academic offer and the requirements the job market has for graduates in these areas, conducted under the UPSKILLS project. The main focus is on the knowledge and skills that are insufficiently covered in existing linguistics and language-related curricula but can open new job perspectives for students. The created learning content can be used as individual elements or as an integrated module. These guidelines are created to serve as: Reference material for UPSKILLS project partners – the partners will consult the guidelines as they create the learning content in line with the project goals. Teaching guides for those using the materials we create – those who wish to use our materials can gain insight into our approach and methodology Learning content creation guides – for those who wish to create new materials based on the model we developed under UPSKILLS}, urldate = {2023-10-02}, author = {Gledić, Jelena and Assimakopoulos, Stavros and Buchberger, Iva and Budimirović, Jelena and Đukanović, Maja and Kraš, Tihana and Podboj, Martina and Soldatić, Nađa and Vella, Michela}, month = sep, year = {2021}, note = {Publisher: Zenodo}, }
@article{linka_pain_2021, title = {Pain and the {Body} in {Corpus} {Hippocraticum}: {A} {Distributional} {Semantic} {Analysis}}, copyright = {Copyright (c) 2021}, issn = {2364-7957}, shorttitle = {Pain and the {Body} in {Corpus} {Hippocraticum}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/81212}, doi = {10.11588/dco.2021.7.81212}, abstract = {Die Autoren der im Corpus Hippocraticum versammelten medizinischen Abhandlungen erwähnen häufig den Schmerz, seine Eigenschaften und seinen Ursprung. Gleichzeitig liefern sie jedoch keine ausdrückliche Definition oder Theorie des Schmerzes, seiner Natur und seiner Beziehung zu anderen wichtigen Aspekten der hippokratischen Medizin. Außerdem verwenden sie mindestens vier Wortfamilien, von denen man annimmt, dass sie im Altgriechischen Schmerzen bezeichnen. Dies bringt moderne Forscher zu der Frage, wie sich diese vier Schmerzwörter semantisch unterscheiden und inwieweit sie auf einer gemeinsamen Vorstellung von Schmerz beruhen. In diesem Artikel versuchen wir, diese Fragen zu beantworten, indem wir das Korpus mit Hilfe verschiedener computergestützter Textanalysemethoden analysieren, insbesondere mit Hilfe eines Ansatzes zur distributionellen semantischen Modellierung. Unsere Ergebnisse zeigen einen engen Zusammenhang zwischen einigen dieser Schmerzwörter, Körperteilen und pathologischen Zuständen. Die Ergebnisse werden außerdem mit den Erkenntnissen verglichen, die durch traditionelles genaues Lesen der Quellen gewonnen wurden.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Linka, Vojtěch and Kaše, Vojtěch}, month = sep, year = {2021}, keywords = {DSM}, pages = {54--71}, }
@article{nikolaev_considerations_2021, title = {{SOME} {CONSIDERATIONS} {ON} {THE} {ATTRIBUTION} {OF} {THE} ���{NEW} {APULEIUS}’}, volume = {71}, issn = {0009-8388, 1471-6844}, url = {https://www.cambridge.org/core/journals/classical-quarterly/article/some-considerations-on-the-attribution-of-the-new-apuleius/8F7FF4C1442452FC41CC6005E8501EB8}, doi = {10.1017/S0009838821000987}, abstract = {The ‘New Apuleius’ is a set of Latin summaries of Plato's works first published in 2016 by Justin Stover, who attributed it to Apuleius. The present article attempts to assess two key aspects of Stover's argument, viz. his reconstruction of the manuscript transmission of the new text and his use of computer-assisted stylometric techniques. The authors suggest that both strands of his argument are inconclusive. First, it is argued that the transposition of gatherings in the archetype of the Apuleian philosophica as envisaged by Stover is highly unrealistic. Second, replications of Stover's stylometric experiments show that their results are highly dependent on the particular algorithm settings and on the composition of the corpus. It is further shown that Stover's choice of highly specialized stylometric techniques is suboptimal, because popular generalist methods for statistical data analysis are demonstrably more successful in correctly identifying authors of Latin text fragments and do not support the case for Apuleius’ authorship of the new text. The authors conclude that there are no solid grounds to conclude that the ‘New Apuleius’ was indeed written by Apuleius.}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {The Classical Quarterly}, author = {Nikolaev, Dmitry and Shumilin, Mikhail}, month = dec, year = {2021}, note = {Publisher: Cambridge University Press}, keywords = {Apuleius, Burrow's Delta, attribution, computer-assisted stylometry, transmission, ‘New Apuleius’}, pages = {819--848}, }
@article{nagy_carmen_2021, title = {Carmen et {Standard} {Error}: {Computational} {Methods} in {Stylometry} for {Classical} {Latin} {Poetry}}, language = {en}, author = {Nagy, Benjamin C}, year = {2021}, }
@inproceedings{burns_profiling_2021, address = {Online}, title = {Profiling of {Intertextuality} in {Latin} {Literature} {Using} {Word} {Embeddings}}, url = {https://aclanthology.org/2021.naacl-main.389}, doi = {10.18653/v1/2021.naacl-main.389}, language = {en}, urldate = {2023-08-26}, booktitle = {Proceedings of the 2021 {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}}, publisher = {Association for Computational Linguistics}, author = {Burns, Patrick J. and Brofos, James A. and Li, Kyle and Chaudhuri, Pramit and Dexter, Joseph P.}, year = {2021}, pages = {4900--4907}, }
@article{de_carvalho-filho_twelve_2020, title = {Twelve tips for implementing a community of practice for faculty development}, volume = {42}, issn = {0142-159X}, url = {https://doi.org/10.1080/0142159X.2018.1552782}, doi = {10.1080/0142159X.2018.1552782}, abstract = {Teaching and learning practices often fail to incorporate new concepts in the ever-evolving field of medical education. Although medical education research provides new insights into curricular development, learners’ engagement, assessment methods, professional development, interprofessional education, and so forth, faculty members often struggle to modernize their teaching practices. Communities of practice (CoP) for faculty development offer an effective and sustainable approach for knowledge management and implementation of best practices. A successful CoP creates and shares knowledge in the context of a specific practice toward the development of expertise. CoPs’ collaborative nature, based on the co-creation of practical solutions to daily problems, aligns well with the goals of applying best practices in health professions education and training new faculty members. In our article, we share 12 tips for implementing a community of practice for faculty development. The tips were based on a comprehensive literature review and the authors’ experiences.}, number = {2}, urldate = {2025-02-02}, journal = {Medical Teacher}, author = {de Carvalho-Filho, Marco Antonio and Tio, René A. and Steinert, Yvonne}, month = feb, year = {2020}, pmid = {30707855}, note = {Publisher: Taylor \& Francis \_eprint: https://doi.org/10.1080/0142159X.2018.1552782}, pages = {143--149}, }
@inproceedings{foka_semantically_2020, address = {Seattle, Washington, USA}, title = {Semantically geo-annotating an ancient {Greek} "travel guide" {Itineraries}, {Chronotopes}, {Networks}, and {Linked} {Data}}, isbn = {978-1-4503-8163-5}, url = {https://dl.acm.org/doi/10.1145/3423337.3429433}, doi = {10.1145/3423337.3429433}, abstract = {Pausanias's second-century CE Periegesis Hellados presents a ten-volume grand tour of the Greek mainland. After the post-enlightenment rediscovery of ancient Greek literature, his Description of Greece proved highly influential as a guidebook to Greece's antiquities, directing travellers and archaeologists alike to uncovering and interpreting major sites, notably at Athens, Corinth and Olympia. Recent studies focusing on his Description as a narrative, however, have drawn attention to the textual construction of space, and the different ways in which space and place are conceptualised and related to each other. This paper outlines the initial work of the Digital Periegesis project, which is using semantic geo-annotation to capture and analyse the forms of space within and the spatial form of this narrative. In particular, it discusses the challenges and affordances of using geo-parsing, spatio-temporal analysis, network analysis, and Linked Open Data (LOD) for rethinking the geographies of a non-modern literary text as based more on topological connections than topographic proximity.}, language = {en}, urldate = {2025-01-26}, booktitle = {Proceedings of the 4th {ACM} {SIGSPATIAL} {Workshop} on {Geospatial} {Humanities}}, publisher = {ACM}, author = {Foka, Anna and Barker, Elton and Konstantinidou, Kyriaki and Mostofian, Nasrin and Demiroglu, O. Cenk and Kiesling, Brady and Talatas, Linda}, year = {2020}, pages = {1--9}, }
@inproceedings{sprugnoli_overview_2020, address = {Marseille, France}, title = {Overview of the {EvaLatin} 2020 {Evaluation} {Campaign}}, isbn = {979-10-95546-53-5}, url = {https://aclanthology.org/2020.lt4hala-1.16}, abstract = {This paper describes the first edition of EvaLatin, a campaign totally devoted to the evaluation of NLP tools for Latin. The two shared tasks proposed in EvaLatin 2020, i. e. Lemmatization and Part-of-Speech tagging, are aimed at fostering research in the field of language technologies for Classical languages. The shared dataset consists of texts taken from the Perseus Digital Library, processed with UDPipe models and then manually corrected by Latin experts. The training set includes only prose texts by Classical authors. The test set, alongside with prose texts by the same authors represented in the training set, also includes data relative to poetry and to the Medieval period. This also allows us to propose the Cross-genre and Cross-time subtasks for each task, in order to evaluate the portability of NLP tools for Latin across different genres and time periods. The results obtained by the participants for each task and subtask are presented and discussed.}, language = {English}, urldate = {2024-08-31}, booktitle = {Proceedings of {LT4HALA} 2020 - 1st {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, publisher = {European Language Resources Association (ELRA)}, author = {Sprugnoli, Rachele and Passarotti, Marco and Cecchini, Flavio Massimiliano and Pellegrini, Matteo}, editor = {Sprugnoli, Rachele and Passarotti, Marco}, month = may, year = {2020}, pages = {105--110}, }
@misc{berra_aurelberrastopwords_2020, title = {aurelberra/stopwords v2.3.0}, copyright = {Open Access}, url = {https://zenodo.org/record/1165205}, abstract = {This repository contains Ancient Greek and Latin stopwords for textual analysis.}, urldate = {2024-05-17}, publisher = {[object Object]}, author = {Berra, Aurélien}, month = may, year = {2020}, doi = {10.5281/ZENODO.1165205}, }
@techreport{stoeckel_voting_2020, title = {Voting for {POS} {Tagging} of {Latin} {Texts} : {Using} the {Flair} of {FLAIR} to {Better} {Ensemble} {Classifiers} by {Example} of {Latin}}, url = {https://aclanthology.org/2020.lt4hala-1.21.pdf}, number = {Proceedings of the LREC 2020 1st Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2020)}, author = {Stoeckel, Manuel and Henlein, Alexander and Hemati, Wahed and Mehler, Alexander}, year = {2020}, pages = {130--135}, }
@inproceedings{straka_udpipe_2020, title = {{UDPipe} at {EvaLatin} 2020: {Contextualized} embeddings and treebank embeddings}, url = {https://arxiv.org/pdf/2006.03687.pdf}, booktitle = {Proceedings of {LT4HALA} 2020-1st {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, author = {Straka, Milan and Straková, Jana}, year = {2020}, pages = {124--129}, }
@article{gorman_author_2020, title = {Author {Identification} of {Short} {Texts} {Using} {Dependency} {Treebanks} without {Vocabulary}}, volume = {35}, doi = {doi:10.1093/llc/fqz070}, number = {4}, journal = {Digital scholarship in the humanities}, author = {Gorman, Robert J.}, year = {2020}, pages = {812--825}, }
@inproceedings{yeruva_interpretation_2020, address = {Online}, title = {Interpretation of {Sentiment} {Analysis} in {Aeschylus}'s {Greek} {Tragedy}}, url = {https://www.aclweb.org/anthology/2020.latechclfl-1.17}, abstract = {Recent advancements in NLP and machine learning have created unique challenges and opportunities for digital humanities research. In particular, there are ample opportunities for NLP and machine learning researchers to analyze data from literary texts and to broaden our understanding of human sentiment in classical Greek tragedy. In this paper, we will explore the challenges and benefits from the human and machine collaboration for sentiment analysis in Greek tragedy and address some open questions related to the collaborative annotation for the sentiments in literary texts. We focus primarily on (i) an analysis of the challenges in sentiment analysis tasks for humans and machines, and (ii) whether consistent annotation results are generated from the multiple human annotators and multiple machine annotators. For human annotators, we have used a survey-based approach with about 60 college students. We have selected three popular sentiment analysis tools for machine annotators, including VADER, CoreNLP's sentiment annotator, and TextBlob. We have conducted a qualitative and quantitative evaluation and confirmed our observations on sentiments in Greek tragedy.}, urldate = {2021-05-21}, booktitle = {Proceedings of the {The} 4th {Joint} {SIGHUM} {Workshop} on {Computational} {Linguistics} for {Cultural} {Heritage}, {Social} {Sciences}, {Humanities} and {Literature}}, publisher = {International Committee on Computational Linguistics}, author = {Yeruva, Vijaya Kumari and ChandraShekar, Mayanka and Lee, Yugyung and Rydberg-Cox, Jeff and Blanton, Virginia and Oyler, Nathan A}, month = dec, year = {2020}, pages = {138--146}, }
@inproceedings{yeruva_interpretation_2020, title = {Interpretation of {Sentiment} {Analysis} with {Human}-in-the-{Loop}}, isbn = {1-72816-251-3}, publisher = {IEEE}, author = {Yeruva, Vijaya Kumari and Chandrashekar, Mayanka and Lee, Yugyung and Rydberg-Cox, Jeff and Blanton, Virginia and Oyler, Nathan A}, year = {2020}, pages = {3099--3108}, }
@inproceedings{sprugnoli_odi_2020, title = {Odi et {Amo}. {Creating}, {Evaluating} and {Extending} {Sentiment} {Lexicons} for {Latin}.}, url = {https://aclanthology.org/2020.lrec-1.376.pdf}, author = {Sprugnoli, Rachele and Passarotti, Marco and Corbetta, Daniela and Peverelli, Andrea}, year = {2020}, pages = {3078--3086}, }
@inproceedings{hellwig_treebank_2020, address = {Marseille, France}, title = {The {Treebank} of {Vedic} {Sanskrit}}, url = {https://aclanthology.org/2020.lrec-1.632/}, booktitle = {Proceedings of the {Twelfth} {Language} {Resources} and {Evaluation} {Conference}}, publisher = {European Language Resources Association}, author = {Hellwig, Oliver and Scarlata, Salvatore and Widmer, Paul}, year = {2020}, pages = {5137--5146}, }
@inproceedings{sprugnoli_overview_2020, address = {Marseille, France}, title = {Overview of the {EvaLatin} 2020 {Evaluation} {Campaign}}, url = {https://aclanthology.org/2020.lt4hala-1.16/}, booktitle = {Proceedings of {LT4HALA} 2020 - 1st {Workshop} on {Language} {Technologies} for {Historical} and {Ancient} {Languages}}, publisher = {European Language Resources Association (ELRA)}, author = {Sprugnoli, Rachele and Passarotti, Marco and Cecchini, Flavio Massimiliano and Pellegrini, Matteo}, year = {2020}, pages = {15--110}, }
@article{papantoniou_nlp_2020, title = {{NLP} for the {Greek} {Language}: {A} {Brief} {Survey}.}, doi = {https://doi.org/10.1145/3411408.3411410}, journal = {11th Hellenic Conference on Artificial Intelligence (SETN 2020)}, author = {Papantoniou, Katerina and Tzitzikas, Yannis}, year = {2020}, }
@techreport{pellegrini_using_2020, title = {Using {LatInfLexi} for an {Entropy}-{Based} {Assessment} of {Predictability} in {Latin} {Inflection}}, number = {Proceedings of the LREC 2020 1st Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2020)}, author = {Pellegrini, Matteo}, year = {2020}, pages = {37--46}, }
@article{vayansky_review_2020, title = {A review of topic modeling methods}, volume = {94}, url = {https://www.researchgate.net/profile/Sathish_Kumar50/publication/342288300_A_review_of_topic_modeling_methods/links/5ef14381299bf1faac6f22f9/A-review-of-topic-modeling-methods.pdf}, doi = {10.1016/j.is.2020.101582}, abstract = {Topic modeling is a popular analytical tool for evaluating data. Numerous methods of topic modeling have been developed which consider many kinds of relationships and restrictions within datasets; however, these methods are not frequently employed. Instead many researchers gravitate to Latent Dirichlet Analysis, which although flexible and adaptive, is not always suited for modeling more complex data relationships. We present different topic modeling approaches capable of dealing with correlation between topics, the changes of topics over time, as well as the ability to handle short texts such as encountered in social media or sparse text data. We also briefly review the algorithms which are used to optimize and infer parameters in topic modeling, which is essential to producing meaningful results regardless of method. We believe this review will encourage more diversity when performing topic modeling and help determine what topic modeling method best suits the user needs.}, journal = {Information Systems}, author = {Vayansky, Ike and Kumar, Sathish}, month = jun, year = {2020}, pages = {1--32}, }
@inproceedings{long_what_2020, title = {What is {AI} literacy? {Competencies} and design considerations}, url = {https://dl.acm.org/doi/pdf/10.1145/3313831.3376727}, author = {Long, Duri and Magerko, Brian}, year = {2020}, pages = {1--16}, }
@book{florio-hansen_digitalisierung_2020, title = {Digitalisierung, {Künstliche} {Intelligenz} und {Robotik}: {Eine} {Einführung} für {Schule} und {Unterricht}}, isbn = {978-3-8252-5429-2}, shorttitle = {Digitalisierung, {Künstliche} {Intelligenz} und {Robotik}}, url = {https://books.google.it/books?hl=en&lr=&id=HHUGEAAAQBAJ}, language = {de}, publisher = {UTB}, author = {Florio-Hansen, Inez De}, month = nov, year = {2020}, note = {Google-Books-ID: HHUGEAAAQBAJ}, }
@book{richards_fundamentals_2020, title = {Fundamentals of {Software} {Architecture}: {An} {Engineering} {Approach}}, isbn = {978-1-4920-4342-3}, shorttitle = {Fundamentals of {Software} {Architecture}}, url = {https://books.google.de/books?hl=de&lr=&id=xa7MDwAAQBAJ}, abstract = {Salary surveys worldwide regularly place software architect in the top 10 best jobs, yet no real guide exists to help developers become architects. Until now. This book provides the first comprehensive overview of software architecture’s many aspects. Aspiring and existing architects alike will examine architectural characteristics, architectural patterns, component determination, diagramming and presenting architecture, evolutionary architecture, and many other topics.Mark Richards and Neal Ford—hands-on practitioners who have taught software architecture classes professionally for years—focus on architecture principles that apply across all technology stacks. You’ll explore software architecture in a modern light, taking into account all the innovations of the past decade.This book examines:Architecture patterns: The technical basis for many architectural decisionsComponents: Identification, coupling, cohesion, partitioning, and granularitySoft skills: Effective team management, meetings, negotiation, presentations, and moreModernity: Engineering practices and operational approaches that have changed radically in the past few yearsArchitecture as an engineering discipline: Repeatable results, metrics, and concrete valuations that add rigor to software architecture}, language = {en}, publisher = {"O'Reilly Media, Inc."}, author = {Richards, Mark and Ford, Neal}, month = jan, year = {2020}, note = {Google-Books-ID: xa7MDwAAQBAJ}, keywords = {Computers / Software Development \& Engineering / General, Computers / Software Development \& Engineering / Systems Analysis \& Design, Computers / Software Development \& Engineering / Tools, Computers / Systems Architecture / Distributed Systems \& Computing}, }
@article{diemke_alkibiades_2020, title = {Alkibiades, {Pyrrhos} und {Alexander}: {Eine} {Untersuchung} zu {Emotionen} und {Gewalt} in den {Viten} {Plutarchs} unter {Verwendung} digitaler {Methoden}}, copyright = {Copyright (c) 2020 Digital Classics Online}, issn = {2364-7957}, shorttitle = {Alkibiades, {Pyrrhos} und {Alexander}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/77663}, doi = {10.11588/dco.2020.2.77663}, abstract = {Forscher haben die Bedeutung von Emotionen in Plutarchs Biographien weitgehend ignoriert, obwohl Emotionen für die Entstehung von Gewalt eine entscheidende Rolle spielen. Mit Hilfe von ERIS, einem Hamburger Informationssystem zur Darstellung griechischer und römischer Gewalt, werden Gewaltdarstellungen, die auf ein emotionales Motiv zurückgehen, in den Biographien von Alkibiades, Pyrrhos und Alexander untersucht. Durch eine Visualisierung lassen sich Muster und Beziehungen zwischen den Objekten und Merkmalen schneller erkennen. Die Ergebnisse zeigen, wie digitale Werkzeuge dazu beitragen können, neue Beziehungen zwischen Opfer, Täter, Waffe, Gewaltmethode und Motiv aufzudecken. Darüber hinaus zeigen die Ergebnisse, wie stark der wachsende Machteinfluss und das Fehlen von Paideia das Gewaltverhalten und die fehlende Selbstkontrolle der Protagonisten forcieren können. Die Untersuchung soll das Erkenntnispotenzial und den Mehrwert, der aus der Anwendung von digitaler und hermeneutischer Analyse resultiert, aufzeigen.}, language = {de}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Diemke, Justine}, month = dec, year = {2020}, keywords = {Informationssystem}, pages = {57--74}, }
@inproceedings{nicolosi_clarin-it_2020, title = {{CLARIN}-{IT} and the {Definition} of a {Digital} {Critical} {Edition} for {Ancient} {Greek} {Poetry}}, url = {https://ep.liu.se/en/conference-article.aspx?series=ecp&issue=172&Article_No=11}, doi = {10.3384/ecp2020172011}, abstract = {Ancient Greek studies, and Classics in general, is a perfect field of investigation in Digital Humanities. Indeed, DH approaches could become a means of building models for complex realities, analyzing them with computational methods and sharing the results with a broader public. Ancient texts have a complex tradition, which includes many witnesses (texts that handed down other texts) and different typologies of supports (papyri, manuscripts, and epigraphs). These texts are the basis of all European Literatures and it is crucial to spread their knowledge, in a reliable and easy way. Our project on ancient Greek fragmentary poetry (DEA - Digital Edition of Archilochus: New models and tools for authoring, editing and indexing an ancient Greek fragmentary author), growing out of the existing experience, tries to define a TEI-based digital critical edition combined with NLP techniques and semantic web technologies. Our goal is to provide a complete and reliable tool for scholars, suitable for critical studies in Classics, and a user-friendly environment also for non-specialist users. The project represents one of the attempts within the context of CLARIN-IT to contribute to the wider impact of CLARIN on the specific Italian community interested in Digital Classics. It is intended to improve services in fostering new knowledge in SSH digital research and sustaining the existing one.}, language = {en}, urldate = {2023-08-26}, author = {Nicolosi, Anika and Monachini, Monica and Nova, Beatrice}, month = jul, year = {2020}, pages = {85--93}, }
@article{ribary_corpus_2020, title = {A {Corpus} {Approach} to {Roman} {Law} {Based} on {Justinian}’s {Digest}}, volume = {7}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {2227-9709}, url = {https://www.mdpi.com/2227-9709/7/4/44}, doi = {10.3390/informatics7040044}, abstract = {Traditional philological methods in Roman legal scholarship such as close reading and strict juristic reasoning have analysed law in extraordinary detail. Such methods, however, have paid less attention to the empirical characteristics of legal texts and occasionally projected an abstract framework onto the sources. The paper presents a series of computer-assisted methods to open new frontiers of inquiry. Using a Python coding environment, we have built a relational database of the Latin text of the Digest, a historical sourcebook of Roman law compiled under the order of Emperor Justinian in 533 CE. Subsequently, we investigated the structure of Roman law by automatically clustering the sections of the Digest according to their linguistic profile. Finally, we explored the characteristics of Roman legal language according to the principles and methods of computational distributional semantics. Our research has discovered an empirical structure of Roman law which arises from the sources themselves and complements the dominant scholarly assumption that Roman law rests on abstract structures. By building and comparing Latin word embeddings models, we were also able to detect a semantic split in words with general and legal sense. These investigations point to a practical focus in Roman law which is consistent with the view that ancient law schools were more interested in training lawyers for practice rather than in philosophical neatness.}, language = {en}, number = {4}, urldate = {2023-08-26}, journal = {Informatics}, author = {Ribary, Marton and McGillivray, Barbara}, month = dec, year = {2020}, note = {Number: 4 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {Digest, Latin, LatinISE, Python, Roman law, clustering, computational linguistics, corpus linguistics, distributional semantics, word embeddings}, pages = {44}, }
@article{burns_ensemble_2020, title = {Ensemble lemmatization with the {Classical} {Language} {Toolkit}}, volume = {58}, copyright = {Copyright (c) 2020 Studi e Saggi Linguistici}, issn = {2281-9142}, url = {https://studiesaggilinguistici.it/ssl/article/view/273}, doi = {10.4454/ssl.v58i1.273}, abstract = {Because of the less-resourced nature of historical languages, non-standard solutions are often required for natural language processing tasks. This article introduces one such solution for historical-language lemmatization, that is the Ensemble lemmatizer for the Classical Language Toolkit, an open-source Python package that supports NLP research for historical languages. Ensemble lemmatization is the most recent development at CLTK in the repurposing and refactoring of an existing method designed for one task, specifically the backoff method as used for part-of-speech tagging, for use in a different task, namely lemmatization. This article argues for the benefits of ensemble lemmatization, specifically, flexible tool construction and the use of all available information to reach tagging decisions, and presents two use cases.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {Studi e Saggi Linguistici}, author = {Burns, Patrick J.}, month = sep, year = {2020}, note = {Number: 1}, pages = {157--176}, }
@article{sprugnoli_building_2020, title = {Building and {Comparing} {Lemma} {Embeddings} for {Latin}. {Classical} {Latin} versus {Thomas} {Aquinas}}, volume = {6}, copyright = {https://creativecommons.org/licenses/by-nc-nd/4.0/}, issn = {2499-4553}, url = {https://journals.openedition.org/ijcol/624}, doi = {10.4000/ijcol.624}, abstract = {This paper presents a new set of lemma embeddings for the Latin language. Embeddings are trained on a manually annotated corpus of texts belonging to the Classical era: different models, architectures and dimensions are tested and evaluated using a novel benchmark for the synonym selection task. In addition, we release vectors pre-trained on the “Opera Maiora” by Thomas Aquinas, thus providing a resource to analyze Latin in a diachronic perspective. The embeddings built upon the two training corpora are compared to each other to support diachronic lexical studies. The words showing the highest usage change between the two corpora are reported and a selection of them is discussed.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {IJCoL. Italian Journal of Computational Linguistics}, author = {Sprugnoli, Rachele and Moretti, Giovanni and Passarotti, Marco}, month = jun, year = {2020}, note = {Number: 1 Publisher: Accademia University Press}, pages = {29--45}, }
@misc{bamman_latin_2020, title = {Latin {BERT}: {A} {Contextual} {Language} {Model} for {Classical} {Philology}}, shorttitle = {Latin {BERT}}, url = {http://arxiv.org/abs/2009.10053}, abstract = {We present Latin BERT, a contextual language model for the Latin language, trained on 642.7 million words from a variety of sources spanning the Classical era to the 21st century. In a series of case studies, we illustrate the affordances of this language-specific model both for work in natural language processing for Latin and in using computational methods for traditional scholarship: we show that Latin BERT achieves a new state of the art for part-of-speech tagging on all three Universal Dependency datasets for Latin and can be used for predicting missing text (including critical emendations); we create a new dataset for assessing word sense disambiguation for Latin and demonstrate that Latin BERT outperforms static word embeddings; and we show that it can be used for semanticallyinformed search by querying contextual nearest neighbors. We publicly release trained models to help drive future work in this space.}, language = {en}, urldate = {2023-08-26}, publisher = {arXiv}, author = {Bamman, David and Burns, Patrick J.}, month = sep, year = {2020}, note = {arXiv:2009.10053 [cs]}, keywords = {Computer Science - Computation and Language}, }
@incollection{burns_building_2019, address = {Berlin \& Boston}, title = {Building a {Text} {Analysis} {Pipeline} for {Classical} {Languages}}, url = {https://doi.org/10.1515/9783110599572-010}, booktitle = {Digital {Classical} {Philology}: {Ancient} {Greek} and {Latin} in the {Digital} {Revolution}}, publisher = {De Gruyter}, author = {Burns, Patrick J}, editor = {Berti, Monica}, year = {2019}, pages = {159--176}, }
@article{min_modeling_2019, title = {Modeling narrative structure and dynamics with networks, sentiment analysis, and topic modeling}, volume = {14}, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0226025}, number = {12}, journal = {PLoS ONE}, author = {Min, Semi and Park, Juyong}, year = {2019}, }
@inproceedings{sprugnoli_vir_2019, title = {Vir is to {Moderatus} as {Mulier} is to {Intemperans}. {Lemma} {Embeddings} for {Latin}}, url = {https://ceur-ws.org/Vol-2481/paper69.pdf}, author = {Sprugnoli, Rachele and Passarotti, Marco and Moretti, Giovanni}, year = {2019}, }
@inproceedings{mitchell_model_2019, title = {Model {Cards} for {Model} {Reporting}}, url = {http://arxiv.org/abs/1810.03993}, doi = {10.1145/3287560.3287596}, abstract = {Trained machine learning models are increasingly used to perform high-impact tasks in areas such as law enforcement, medicine, education, and employment. In order to clarify the intended use cases of machine learning models and minimize their usage in contexts for which they are not well suited, we recommend that released models be accompanied by documentation detailing their performance characteristics. In this paper, we propose a framework that we call model cards, to encourage such transparent model reporting. Model cards are short documents accompanying trained machine learning models that provide benchmarked evaluation in a variety of conditions, such as across different cultural, demographic, or phenotypic groups (e.g., race, geographic location, sex, Fitzpatrick skin type) and intersectional groups (e.g., age and race, or sex and Fitzpatrick skin type) that are relevant to the intended application domains. Model cards also disclose the context in which models are intended to be used, details of the performance evaluation procedures, and other relevant information. While we focus primarily on human-centered machine learning models in the application fields of computer vision and natural language processing, this framework can be used to document any trained machine learning model. To solidify the concept, we provide cards for two supervised models: One trained to detect smiling faces in images, and one trained to detect toxic comments in text. We propose model cards as a step towards the responsible democratization of machine learning and related AI technology, increasing transparency into how well AI technology works. We hope this work encourages those releasing trained machine learning models to accompany model releases with similar detailed evaluation numbers and other relevant documentation.}, urldate = {2024-06-01}, booktitle = {Proceedings of the {Conference} on {Fairness}, {Accountability}, and {Transparency}}, author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit}, month = jan, year = {2019}, note = {arXiv:1810.03993 [cs]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning}, pages = {220--229}, }
@article{rodda_vector_2019, title = {Vector space models of {Ancient} {Greek} word meaning, and a case study on {Homer}}, volume = {60}, issn = {1248-9433}, url = {https://aclanthology.org/2019.tal-3.4.pdf}, number = {3}, journal = {Traitement Automatique des Langues}, author = {Rodda, M and Probert, Philomen and McGillivray, Barbara}, year = {2019}, note = {Publisher: Lavoisier}, }
@inproceedings{devlin_bert_2019, address = {Minneapolis, Minnesota}, title = {{BERT}: {Pre}-training of {Deep} {Bidirectional} {Transformers} for {Language} {Understanding}}, shorttitle = {{BERT}}, url = {https://www.aclweb.org/anthology/N19-1423}, doi = {10.18653/v1/N19-1423}, abstract = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7\% (4.6\% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).}, urldate = {2020-11-25}, booktitle = {Proceedings of the 2019 {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}, {Volume} 1 ({Long} and {Short} {Papers})}, publisher = {Association for Computational Linguistics}, author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, month = jun, year = {2019}, pages = {4171--4186}, }
@inproceedings{bolt_stylometry_2019, address = {Hong Kong, China}, title = {A {Stylometry} {Toolkit} for {Latin} {Literature}}, booktitle = {Proceedings of the 2019 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing} and the 9th {International} {Joint} {Conference} on {Natural} {Language} {Processing} ({EMNLP}-{IJCNLP}): {System} {Demonstrations}}, publisher = {Association for Computational Linguistics}, author = {Bolt, Thomas J. and Flynt, Jeffrey H. and Chaudhuri, Pramit and Dexter, Joseph P}, year = {2019}, pages = {205--210}, }
@book{schubert_platon_2019, title = {Platon digital: {Tradition} und {Rezeption}}, isbn = {978-3-947450-07-7}, shorttitle = {Platon digital}, url = {https://nbn-resolving.org/urn:nbn:de:101:1-2019070314102240510161}, abstract = {Platon ist nach Homer der antike Autor mit der reichhaltigsten Rezeption vom Altertum über das Mittelalter bis in die Neuzeit. Gleichwohl und gerade aus diesem Grund ist diese bisher allenfalls bruchstückhaft aufgearbeitet worden. Die Autoren versuchen, diesem alten Ziel geisteswissenschaftlicher Forschung auf neuen Wegen näherzukommen, indem sie eine informationswissenschaftliche Perspektive auf Platon und seine Rezeption anwenden. Dazu sind innovative Methoden der Paraphrasensuche entwickelt worden, um diese auch als Methode altertumswissenschaftlich und kulturwissenschaftlich interessierter Forschung zu etablieren.}, language = {German}, urldate = {2020-02-17}, author = {Schubert, Charlotte and Molitor, Paul and Ritter, Jörg and Scharloth, Joachim and Sier, Kurt}, year = {2019}, keywords = {reference}, }
@book{berti_digital_2019, address = {Berlin}, series = {Age of {Access}? {Grundfragen} der {Informationsgesellschaft}}, title = {Digital classical philology: {Ancient} {Greek} and {Latin} in the digital revolution}, volume = {10}, isbn = {978-3-11-059678-6}, abstract = {Thanks to the digital revolution, even a traditional discipline like philology has been enjoying a renaissance within academia and beyond. Decades of work have been producing groundbreaking results, raising new research questions and creating innovative educational resources. This book describes the rapidly developing state of the art of digital philology with a focus on Ancient Greek and Latin, the classical languages of Western culture. Contributions cover a wide range of topics about the accessibility and analysis of Greek and Latin sources. The discussion is organized in five sections concerning open data of Greek and Latin texts; catalogs and citations of authors and works; data entry, collection and analysis for classical philology; critical editions and annotations of sources; and finally linguistic annotations and lexical databases. As a whole, the volume provides a comprehensive outline of an emergent research field for a new generation of scholars and students, explaining what is reachable and analyzable that was not before in terms of technology and accessibility.}, publisher = {Walter de Gruyter \& Co}, author = {Berti, Monica}, year = {2019}, keywords = {Humanités digitales, Numérisation, Philologie classique, reference}, }
@article{vainio_reconsidering_2019, title = {Reconsidering {Authorship} in the {Ciceronian} {Corpus} through {Computational} {Authorship} {Attribution}}, volume = {3}, url = {https://ojs.unito.it/index.php/COL/article/view/3518/3182}, language = {en}, number = {1}, journal = {Ciceroniana online}, author = {Vainio, Raija and Välimäki, Reima and Vesanto, Aleksi and Hella, Anni and Kaartinen, Marjo and Immonen, Teemu}, year = {2019}, pages = {15--48}, }
@techreport{keersmaekers_creating_2019, title = {Creating, {Enriching} and {Valorizing} {Treebanks} of {Ancient} {Greek}.}, url = {https://syntaxfest.github.io/syntaxfest19/proceedings/papers/paper_68.pdf}, institution = {Association for Computational Linguistics (ACL)}, author = {Keersmaekers, Alek and Mercelis, Wouter and Swaelens, Colin and Toon, Van Hal}, year = {2019}, pages = {109--117}, }
@misc{beyer_teaching_2019, address = {Berlin}, type = {Talk}, title = {Teaching {Digital} {Literacy} – {Interpretieren} in einer computergestützten {Lehr}-/{Lernumgebung}}, url = {https://doi.org/10.5281/zenodo.3674815}, language = {Deutsch}, author = {Beyer, Andrea and Reichetanz, Paul}, month = mar, year = {2019}, doi = {10.5281/zenodo.3674815}, keywords = {reference}, }
@inproceedings{franzini_nunc_2019, title = {Nunc {Est} {Aestimandum}: {Towards} an {Evaluation} of the {Latin} {WordNet}}, url = {https://www.researchgate.net/profile/Greta-Franzini-2/publication/336799230_Nunc_Est_Aestimandum_Towards_an_Evaluation_of_the_Latin_WordNet/links/5db2be42299bf111d4c83184/Nunc-Est-Aestimandum-Towards-an-Evaluation-of-the-Latin-WordNet.pdf}, author = {Franzini, Greta and Peverelli, Andrea and Ruffolo, Paolo and Passarotti, Marco and Sanna, Helena and Signoroni, Edoardo and Ventura, Viviana and Zampedri, Federica}, year = {2019}, }
@incollection{pockelmann_word_2019, title = {Word {Mover}’s {Distance} angewendet auf die {Paraphrasenextraktion} im {Altgriechischen}}, url = {https://books.ub.uni-heidelberg.de/propylaeum/reader/download/451/451-30-84795-1-10-20190507.pdf}, booktitle = {Platon {Digital}. {Tradition} und {Rezeption}}, publisher = {Propylaeum Heidelberg}, author = {Pöckelmann, Marcus and Ritter, Jörg and Molitor, Paul}, editor = {Schubert, Charlotte and Molitor, Paul and Ritter, Jörg and Sier, Kurt and Scharloth, Joachim}, year = {2019}, pages = {45--60}, }
@article{schubert_visualisierung_2019, title = {Visualisierung von {Textdaten}: {Die} {Falle} der {Metadaten} am {Beispiel} von {Iamblichs} {Protreptikos}}, copyright = {Copyright (c) 2019 Digital Classics Online}, issn = {2364-7957}, shorttitle = {Visualisierung von {Textdaten}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/59356}, doi = {10.11588/dco.2019.1.59356}, abstract = {„Digital Humanities analysieren nicht nur Bilder, sondern produzieren auch neue Bilder“[1] – diese alltägliche Feststellung beleuchtet einen Prozeß, dessen Verlauf einerseits noch ganz offen ist, da diese neuen Repräsentationsmöglichkeiten epistemisch keineswegs erfaßt sind, geschweige denn, daß die Entwicklung in diesem Bereich zu stabilen Praktiken geführt hätte. Andererseits zeigt sich ein unhinterfragter Siegeslauf, der auch schon zu einem neuen Feld wie dem der Visualization Literacy geführt hat. Im vorliegenden Beitrag wird die Visualisierung anhand von Metadaten untersucht. Gerade die Metadaten sind heute im Kontext der großen Datenmengen, die als ‚Big Data‘ anfallen von größter Bedeutung. Die für die Analyse von Big Data notwendigen Aggregationen von Daten sind ohne Metadaten nicht effizient und leistungsstark durchzuführen. Über die normale Datenerfassung hinaus legen Metadaten Muster offen, die sonst nicht sichtbar wären. Dies wiederum wird über ‚Verbildlichung’ als einer heute gängigen Repräsentationsform ermöglicht: Gerade auch für Texte gilt, daß sie in praktischen Anwendungen auf der Grundlage ihrer Metadaten durch Visualisierung zu Bildern werden, die quantitativ ausgewertet können und so wiederum auch in den Forschungsdiskurs eingehen. Von diesen gängigen Vorgehen ausgehend, stellt sich die Frage, ob diese gegenwärtige Praxis wissenschaftlichen Ansprüchen genügt oder ob sich derzeit nicht vielmehr – in einer Zeit, in der Daten als das Öl oder Gold des 21. Jahrhunderts betrachtet werden – eine Art Goldgräberstimmung und ein entsprechend unkritisches Verhalten etablieren. [1] Kwastek 2015; vgl. Kath et al. 2015.}, language = {de}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Schubert, Charlotte}, month = jun, year = {2019}, keywords = {Protreptikos}, pages = {4--21}, }
@article{riess_violence_2019, title = {Violence and the {Sea}: {A} {Digital} {Analysis} of {Maritime} {Acts} of {Violence} {Committed} by {Alcibiades} as {Described} by {Thucydides}, {Xenophon}, and {Plutarch}}, copyright = {Copyright (c) 2020}, issn = {2364-7957}, shorttitle = {Violence and the {Sea}}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/72018}, doi = {10.11588/dco.2019.2.72018}, abstract = {Beim Vergleich der Gewaltmuster, die von Alkibiades in den Werken von Thukydides, Plutarch und Xenophon ausgeübt wurden, treten signifikante Unterschiede im Fokus der drei Autoren zutage, die durch "Eris. Das Hamburger Informationssystem über die Darstellung griechischer und römischer Gewalt" visuell dargestellt werden können. Die Tatsache, dass die Interpretation der graphischen Befunde – also der Kategorien zweiter Ordnung – dennoch zu sehr plausiblen Ergebnissen führt, zeigt, dass dieser Proof of Concept erfolgreich war. Die plausiblen Ergebnisse legen auch nahe, dass wir durch die Verwendung von Eris bisher unentdeckte Gewaltmuster bei der Untersuchung der großen Daten antiker Texte finden werden. Die Grafiken werden somit als Inspirationsquellen dienen, die neue Fragen aufwerfen, die aufgrund der großen Datenmengen noch nicht in unser Denken eingedrungen sind.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Riess, Werner}, year = {2019}, keywords = {Visualisierung}, pages = {4--27}, }
@article{chaudhuri_small_2019, title = {A small set of stylometric features differentiates {Latin} prose and verse}, volume = {34}, issn = {2055-7671}, url = {https://doi.org/10.1093/llc/fqy070}, doi = {10.1093/llc/fqy070}, abstract = {Identifying the stylistic signatures characteristic of different genres is of central importance to literary theory and criticism. In this article we report a large-scale computational analysis of Latin prose and verse using a combination of quantitative stylistics and supervised machine learning. We train a set of classifiers to differentiate prose and poetry with high accuracy (\>97\%) based on a set of twenty-six text-based, primarily syntactic features and rank the relative importance of these features to identify a low-dimensional set still sufficient to achieve excellent classifier performance. This analysis demonstrates that Latin prose and verse can be classified effectively using just three top features. From examination of the highly ranked features, we observe that measures of the hypotactic style favored in Latin prose (i.e. subordinating constructions in complex sentences, such as relative clauses) are especially useful for classification.}, number = {4}, urldate = {2023-08-26}, journal = {Digital Scholarship in the Humanities}, author = {Chaudhuri, Pramit and Dasgupta, Tathagata and Dexter, Joseph P and Iyer, Krithika}, month = dec, year = {2019}, pages = {716--729}, }
@article{mcgillivray_computational_2019, title = {A computational approach to lexical polysemy in {Ancient} {Greek}}, volume = {34}, issn = {2055-7671}, url = {https://doi.org/10.1093/llc/fqz036}, doi = {10.1093/llc/fqz036}, abstract = {Language is a complex and dynamic system. If we consider word meaning, which is the scope of lexical semantics, we observe that some words have several meanings, thus displaying lexical polysemy. In this article, we present the first phase of a project that aims at computationally modelling Ancient Greek semantics over time. Our system is based on Bayesian learning and on the Diorisis Ancient Greek corpus, which we have built for this purpose. We illustrate preliminary results in light of expert annotation, and take this opportunity to discuss the role of computational systems and human analysis in a complex research area like historical semantics. On the one hand, computational approaches allow us to model large corpora of texts. On the other hand, a long and rich scholarly tradition in Ancient Greek has provided us with valuable insights into the mechanisms of semantic change (cf. e.g. Leiwo, M. (2012). Introduction: variation with multiple faces. In Leiwo, M., Halla-aho, H., and Vierros, M. (eds), Variation and Change in Greek and Latin, Helsinki: Suomen Ateenan-instituutin säätiö, pp. 1–11.). In this article, we show that these qualitative analyses can be leveraged to support and complement the computational modelling.}, number = {4}, urldate = {2023-08-26}, journal = {Digital Scholarship in the Humanities}, author = {McGillivray, Barbara and Hengchen, Simon and Lähteenoja, Viivi and Palma, Marco and Vatri, Alessandro}, month = dec, year = {2019}, pages = {893--907}, }
@misc{nagy_metre_2019, title = {Metre as a stylometric feature in {Latin} hexameter poetry}, url = {http://arxiv.org/abs/1911.12478}, abstract = {This paper demonstrates that metre is a privileged indicator of authorial style in classical Latin hexameter poetry. Using only metrical features, pairwise classification experiments are performed between 5 first-century authors (10 comparisons) using four different machine-learning models. The results showed a two-label classification accuracy of at least 95\% with samples as small as ten lines and no greater than eighty lines (up to around 500 words). These sample sizes are an order of magnitude smaller than those typically recommended for BOW ('bag of words') or n-gram approaches, and the reported accuracy is outstanding. Additionally, this paper explores the potential for novelty (forgery) detection, or 'one-class classification'. An analysis of the disputed Aldine Additamentum (Sil. Ital. Puni. 8:144-225) concludes (p=0.0013) that the metrical style differs significantly from that of the rest of the poem.}, language = {en}, urldate = {2023-08-26}, publisher = {arXiv}, author = {Nagy, Benjamin}, month = dec, year = {2019}, note = {arXiv:1911.12478 [cs, stat]}, keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning, Statistics - Applications}, }
@misc{assael_restoring_2019, title = {Restoring ancient text using deep learning: a case study on {Greek} epigraphy}, shorttitle = {Restoring ancient text using deep learning}, url = {http://arxiv.org/abs/1910.06262}, abstract = {Ancient History relies on disciplines such as Epigraphy, the study of ancient inscribed texts, for evidence of the recorded past. However, these texts, “inscriptions”, are often damaged over the centuries, and illegible parts of the text must be restored by specialists, known as epigraphists. This work presents PYTHIA, the first ancient text restoration model that recovers missing characters from a damaged text input using deep neural networks. Its architecture is carefully designed to handle longterm context information, and deal efficiently with missing or corrupted character and word representations. To train it, we wrote a nontrivial pipeline to convert PHI, the largest digital corpus of ancient Greek inscriptions, to machine actionable text, which we call PHI-ML. On PHI-ML, PYTHIA’s predictions achieve a 30.1\% character error rate, compared to the 57.3\% of human epigraphists. Moreover, in 73.5\% of cases the ground-truth sequence was among the Top-20 hypotheses of PYTHIA, which effectively demonstrates the impact of this assistive method on the field of digital epigraphy, and sets the state-of-the-art in ancient text restoration.}, language = {en}, urldate = {2023-01-26}, publisher = {arXiv}, author = {Assael, Yannis and Sommerschield, Thea and Prag, Jonathan}, month = oct, year = {2019}, note = {arXiv:1910.06262 [cs]}, keywords = {Computer Science - Computation and Language, Computer Science - Computers and Society}, }
@incollection{cayless_sustaining_2019, address = {Berlin}, series = {Age of {Access}? {Grundfragen} der {Informationsgesellschaft}}, title = {Sustaining {Linked} {Ancient} {World} {Data}}, volume = {10}, abstract = {Abstract: May 31st, 2018 marked the sixth anniversary of the Linked Ancient World Data Institute (LAWDI), a workshop funded by the US National Endowment For the Humanities. This makes it a good time to take stock of the Ancient World Linked Data initiatives that have been around for some time, as well as some that have foundered and some that are new. What makes for sustainable Linked Open Data? Why do some initiatives thrive while others fail? What resources do successful LOD sites need, and how may they be obtained? The promise of LOD is that it frees our information from the silos in which it is housed, permitting cross-system interactions that improve the quality and usefulness of the information in any single system. This article will take the broader view of the definition of Linked Data suggested by Tim Berners-Lee’s foundational “Linked Data – Design Issues” paper, as encompassing more types of data than simply RDF and other “Semantic Web” technologies. This view of LOD is pragmatic and leverages the strengths of semantic technologies while avoiding their weaknesses.}, booktitle = {Digital classical philology: {Ancient} {Greek} and {Latin} in the digital revolution}, publisher = {Walter de Gruyter \& Co}, author = {Cayless, Hugh A.}, year = {2019}, pages = {35--50}, }
@inproceedings{celano_standoff_2019, address = {New York, NY, USA}, series = {{DATeCH2019}}, title = {Standoff {Annotation} for the {Ancient} {Greek} and {Latin} {Dependency} {Treebank}}, isbn = {978-1-4503-7194-0}, url = {https://doi.org/10.1145/3322905.3322919}, doi = {10.1145/3322905.3322919}, abstract = {This contribution presents the work in progress to convert the Ancient Greek and Latin Dependency Treebank (AGLDT) into standoff annotation using PAULA XML. With an increasing number of annotations of any kind, it becomes more and more urgent that annotations related to the same texts be added standoff. Standoff annotation consists in adding any kind of annotation in separate documents, which are ultimately linked to a main text, the so-called "base text," which is meant to be unchangeable. References occur via a graph-based system of IDs, which allows an annotation layer (contained in a separate file) to be linked to another annotation layer (contained in another separate file). All the annotations/files create a labeled directed acyclic graph, whose root is represented by the base text. Standoff annotation enables easy interoperability and extension, in that single annotation layers can reference other layers of annotation independently, thus overcoming the problem of conflicting hierarchies. Moreover, standoff annotation also allows addition of different annotations of the same kind to the same text (e.g., two different interpretations of the POS tag for a given token). In the present contribution, I show how the annotations of the AGLDT can become standoff using PAULA XML, which is an open access format following the LAF principles. More precisely, I show the case study of Caesar's De Bello Civili. I detail the PAULA XML files created for its tokenization and sentence split, which are preliminary required to add morphosyntactic annotation.}, urldate = {2023-04-26}, booktitle = {Proceedings of the 3rd {International} {Conference} on {Digital} {Access} to {Textual} {Cultural} {Heritage}}, publisher = {Association for Computing Machinery}, author = {Celano, Giuseppe G. A.}, year = {2019}, keywords = {Ancient Greek, Latin, PAULA XML, dependency treebank, standoff annotation}, pages = {149--153}, }
@incollection{forstall_lexical_2019, address = {Cham}, title = {Lexical {Matching}: {Text} {Reuse} as {Intertextuality}}, isbn = {978-3-030-23415-7}, shorttitle = {Lexical {Matching}}, url = {https://doi.org/10.1007/978-3-030-23415-7_3}, abstract = {Words are the basic unit for intertextual search in digital humanities. While published studies employ a variety of features depending on the textual practices with which they are concerned, the vast majority are combinations and transformations of word tokens. In this chapter, we introduce the idea of intertextual discovery through text-reuse detection, and present the core process common to most contemporary analyses.}, language = {en}, urldate = {2023-04-25}, booktitle = {Quantitative {Intertextuality}: {Analyzing} the {Markers} of {Information} {Reuse}}, publisher = {Springer International Publishing}, author = {Forstall, Christopher W. and Scheirer, Walter J.}, editor = {Forstall, Christopher W. and Scheirer, Walter J.}, year = {2019}, doi = {10.1007/978-3-030-23415-7_3}, pages = {55--78}, }
@book{forstall_quantitative_2019, address = {Cham}, title = {Quantitative {Intertextuality}: {Analyzing} the {Markers} of {Information} {Reuse}}, isbn = {978-3-030-23413-3 978-3-030-23415-7}, shorttitle = {Quantitative {Intertextuality}}, url = {http://link.springer.com/10.1007/978-3-030-23415-7}, language = {en}, urldate = {2023-04-25}, publisher = {Springer International Publishing}, author = {Forstall, Christopher W. and Scheirer, Walter J.}, year = {2019}, doi = {10.1007/978-3-030-23415-7}, keywords = {Authorship Attribution, Cultural Studies, Information Retrieval, Information Reuse, Pattern Matching}, }
@inproceedings{mambrini_linked_2019, address = {Paris, France}, title = {Linked {Open} {Treebanks}. {Interlinking} {Syntactically} {Annotated} {Corpora} in the {LiLa} {Knowledge} {Base} of {Linguistic} {Resources} for {Latin}}, abstract = {Paper about the procedure of inclusion of treebanks into the LiLa Knowledge Base of Linguistic Resources for Latin.}, booktitle = {Proceedings of the 18th {International} {Workshop} on {Treebanks} and {Linguistic} {Theories} ({TLT}, {SyntaxFest} 2019)}, author = {Mambrini, Francesco and Passarotti, Marco}, year = {2019}, keywords = {Latin, Linked Data, Treebank}, pages = {74--81}, }
@inproceedings{ochab_stylometry_2019, address = {New York, NY, USA}, series = {{DATeCH2019}}, title = {Stylometry of literary papyri}, isbn = {978-1-4503-7194-0}, url = {https://doi.org/10.1145/3322905.3322930}, doi = {10.1145/3322905.3322930}, abstract = {In this paper we present the first results of stylometric analysis of literary papyri. Specifically we perform a range of tests for unsupervised clustering of authors. We scrutinise both the best classic distance-based methods as well as the state-of-the-art network community detection techniqes. We report on obstacles concerning highly non-uniform distributions of text size and authorial samples combined with sparse feature space. We also note how clustering performance depends on regularisation of spelling by means of querying relevant annotations.}, urldate = {2023-04-25}, booktitle = {Proceedings of the 3rd {International} {Conference} on {Digital} {Access} to {Textual} {Cultural} {Heritage}}, publisher = {Association for Computing Machinery}, author = {Ochab, Jeremi K. and Essler, Holger}, year = {2019}, keywords = {ancient Greek, authorship attribution, papyri, stylometry}, pages = {139--142}, }
@inproceedings{feng_improving_2018, address = {Stockholm, Sweden}, title = {Improving {Low} {Resource} {Named} {Entity} {Recognition} using {Cross}-lingual {Knowledge} {Transfer}}, isbn = {978-0-9992411-2-7}, url = {https://www.ijcai.org/proceedings/2018/566}, doi = {10.24963/ijcai.2018/566}, abstract = {Neural networks have been widely used for high resource language (e.g. English) named entity recognition (NER) and have shown state-of-the-art results. However, for low resource languages, such as Dutch and Spanish, due to the limitation of resources and lack of annotated data, NER models tend to have lower performances. To narrow this gap, we investigate cross-lingual knowledge to enrich the semantic representations of low resource languages. We first develop neural networks to improve low resource word representations via knowledge transfer from high resource language using bilingual lexicons. Further, a lexicon extension strategy is designed to address out-of lexicon problem by automatically learning semantic projections. Finally, we regard word-level entity type distribution features as an external languageindependent knowledge and incorporate them into our neural architecture. Experiments on two low resource languages (Dutch and Spanish) demonstrate the effectiveness of these additional semantic representations (average 4.8\% improvement). Moreover, on Chinese OntoNotes 4.0 dataset, our approach achieves an F-score of 83.07\% with 2.91\% absolute gain compared to the state-of-the-art systems.}, language = {en}, urldate = {2025-01-01}, booktitle = {Proceedings of the {Twenty}-{Seventh} {International} {Joint} {Conference} on {Artificial} {Intelligence}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, author = {Feng, Xiaocheng and Feng, Xiachong and Qin, Bing and Feng, Zhangyin and Liu, Ting}, month = jul, year = {2018}, pages = {4071--4077}, }
@article{kim_survey_2018, title = {A survey on sentiment and emotion analysis for computational literary studies}, journal = {arXiv preprint arXiv:1808.03137}, author = {Kim, Evgeny and Klinger, Roman}, year = {2018}, }
@book{bengfort_applied_2018, address = {Boston}, title = {Applied {Text} {Analysis} with {Python}: {Enabling} {Language}-{Aware} {Data} {Products} with {Machine} {Learning}.}, shorttitle = {Applied {Text} {Analysis} with {Python}}, url = {https://pdfroom.com/books/applied-text-analysis-with-python-enabling-language-aware-data-products-with-machine-learning/ra5179J6gJO}, abstract = {Applied Text Analysis with Python: Enabling Language-Aware Data Products with Machine Learning}, language = {en}, urldate = {2023-12-31}, publisher = {O'Reilly}, author = {Bengfort, Benjamin and Ojeda, Tony and Bilbro, Rebecca}, year = {2018}, }
@article{eckhoff_proiel_2018, title = {The {PROIEL} treebank family: a standard for early attestations of {Indo}-{European} languages}, volume = {52}, doi = {https://doi.org/10.1007/s10579-017-9388-5}, journal = {Lang Resources \& Evaluation}, author = {Eckhoff, H. and Bech, K. and Bouma, G.}, year = {2018}, pages = {29--65}, }
@article{ponti_non-configurationality_2018, title = {Non-{Configurationality} in {Diachrony} : {Correlations} in {Local} and {Global} {Networks} of {Ancient} {Greek} and {Latin}}, volume = {35}, url = {doi:10.1075/dia.00007.pon.}, number = {3}, journal = {Diachronica : international journal for historical linguistics = revue internationale pour la linguistique historique = internationale Zeitschrift für historische Linguistik}, author = {Ponti, Edoardo Maria and Luraghi, Silvia}, year = {2018}, pages = {367--392}, }
@incollection{cecchini_challenges_2018, address = {Bruxelles, Belgium}, series = {Special {Interest} {Group} on linguistic {DATa} and corpus-based approaches to {NLP} ({SIGDAT}), {ACL}.}, title = {Challenges in {Converting} the {Index} {Thomisticus} {Treebank} into {Universal} {Dependencies}}, url = {https://www.aclweb.org/anthology/W18-6004.pdf}, booktitle = {Proceedings of the {Second} {Workshop} on {Universal} {Dependencies} ({UDW} 2018) at {EMNLP} 2018}, author = {Cecchini, F. M. and Passarotti, M. and Marongiu, P. and Zeman, D.}, year = {2018}, pages = {27--36}, }
@incollection{berti_annotating_2018, address = {Zürich/New York}, title = {Annotating {Text} {Reuse} within the {Context}: {The} {Leipzig} {Open} {Fragmentary} {Texts} {Series} ({LOFTS})}, language = {English}, booktitle = {Text, {Kontext}, {Kontextualisierung}. {Moderne} {Kontextkonzepte} und antike {Literatur}}, author = {Berti, Monica}, editor = {Forst, Alexandra and Gärtner, Ursula and Tischer, Ute}, year = {2018}, pages = {223--234}, }
@inproceedings{monachini_digital_2018, title = {Digital {Classics}: {A} {Survey} on the {Needs} of {Ancient} {Greek} {Scholars} in {Italy}}, url = {https://www.clarin.eu/sites/default/files/Monachini-Nicolosi-Stefanini-CLARIN2017_paper_3.pdf}, abstract = {This paper presents and discusses the findings of a survey carried out in order to assess the use of digital resources and digital technologies with respect to work in Ancient Greek scholarship, as well as to identify the factors that are likely to constrain its use and to elicit needs and requirements of Ancient Greek scholars in Italy. The survey is in line with the principles behind the recent user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists.}, language = {en}, booktitle = {Proceedings of the {CLARIN} 2017 {Conference}}, publisher = {Linköping University Electronic Press}, author = {Monachini, Monica and Nicolosi, Anika and Stefanini, Alberto}, year = {2018}, keywords = {reference}, pages = {1--5}, }
@phdthesis{nury_automated_2018, type = {phdthesis}, title = {Automated {Collation} and {Digital} {Editions}: from {Theory} to {Practice}}, shorttitle = {Automated {Collation} and {Digital} {Editions}}, url = {https://hal.science/tel-02493805}, abstract = {The purpose of the dissertation is to investigate from a theoretical and methodological perspective the different tools that allow automated collation, and study the application of such tools to the creation of a digital critical edition in the context of Classical literature. By doing so, the dissertation examines many foundational but often neglected components of the philological method, such as the definition and wider implication of transcription, reading, and variant. The goal is to provide a reflection on automated collation and the theoretical as well as practical challenges it poses: what is automated collation? How is it performed, and what are the main differences with manual collation? What are the benefits of automated collation? Why has it not been widely adopted yet, despite the fact that it was developed to help scholars? How to process the results of collation programmes? As a case study, a Classical Latin text has been used to test automated collation and to compare the various existing tools.}, language = {en}, urldate = {2023-08-26}, school = {King's College London}, author = {Nury, Elisa}, month = jul, year = {2018}, }
@inproceedings{lana_eliciting_2018, address = {Cham}, series = {Communications in {Computer} and {Information} {Science}}, title = {Eliciting the {Ancient} {Geography} from a {Digital} {Library} of {Latin} {Texts}}, isbn = {978-3-319-73165-0}, doi = {10.1007/978-3-319-73165-0_19}, abstract = {Geolat – Geography for Latin Literature is a research project, aimed at making accessible a digital library containing the works of Latin literature (from its origins in 240 BCE to the end of the Roman Empire in 476 CE) through a query interface of geographic/cartographic type representing the geographic knowledge expressed in the Latin texts themselves. A core activity of the project has been the development of the ontology GO!, which describes the geographical knowledge contained in the texts of the library. The ontologically annotated texts will allow for a variety of scientifically relevant uses, apart from the geo-based browsing: for example the production of digital and printed critical editions. The project is under development at Dipartimento di Studi Umanistici of Università del Piemonte Orientale, and financially supported by Fondazione Compagnia di San Paolo.}, language = {en}, booktitle = {Digital {Libraries} and {Multimedia} {Archives}}, publisher = {Springer International Publishing}, author = {Lana, Maurizio and Tambassi, Timothy}, editor = {Serra, Giuseppe and Tasso, Carlo}, year = {2018}, keywords = {Classical latin texts, Digital library, Geography, OWL, Ontology, Web}, pages = {191--200}, }
@article{zhang_did_2018, title = {Did {Gaius} {Julius} {Caesar} {Write} {De} {Bello} {Hispaniensi}? {A} {Computational} {Study} of {Latin} {Classics} {Authorship}}, volume = {14}, copyright = {Copyright (c) 2018 Human IT: Journal for Information Technology Studies as a Human Science}, issn = {1402-151X}, shorttitle = {Did {Gaius} {Julius} {Caesar} {Write} {De} {Bello} {Hispaniensi}?}, url = {https://humanit.hb.se/article/view/515}, abstract = {This project addresses a two-millennium old mystery surrounding the authorship of ancient Latin war memoirs attributed to Caesar, using Distributional Semantics, a modern computational method for detecting written text patterns. The Civil War has been confirmed to be Caesar’s work, as well as the first seven of the eight chapters of the Gallic War, the eighth by Hirtius. The authorship of the African, Alexandrine, and Spanish Wars, though attributed to Caesar, is still under debate. Methods of distributional semantics derive representations of words from their distribution across a large amount of text, such that words that occur in similar contexts have similar representations. These representations can then be combined to model larger units of text, such as chapters and whole books. SemanticVectors software was used to calculate the similarity between chapters or books after dimension reduction using Random Indexing. The results show that the Gallic War’s eighth chapter is significantly different from its other seven chapters and from the Civil War, verifying the ability of distributional semantics to detect different Latin authorships. The African, Alexandrine, and Spanish Wars are notably different from the Civil War andGallic War (first seven chapters), suggesting that Caesar did not write these three. Furthermore, the African, Alexandrine, and Spanish Wars are different from each other and from the Civil and Gallic Wars, suggesting that they were written by different authors. This project demonstrates the value of distributional semantics in classics research. Its implications for digital humanities and real world problems such as plagiarism are discussed.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {Human IT: Journal for Information Technology Studies as a Human Science}, author = {Zhang, Olivia R. and Cohen, Trevor and McGill, Scott}, month = jun, year = {2018}, note = {Number: 1}, keywords = {Caesar, Classics, Latin, authorship attribution, computational linguistics, distributional semantics}, pages = {28--58}, }
@article{vierros_preprocessing_2017, title = {Preprocessing {Greek} {Papyri} for {Linguistic} {Annotation}}, doi = {https://doi.org/10.46298/jdmdh.1385}, number = {Numéro spécial sur le traitement assisté par ordinateur de l‘intertextualité dans les langues anciennes}, journal = {Journal of Data Mining \& Digital Humanities}, author = {Vierros, Marja and Henriksson, Erik}, year = {2017}, }
@book{mistrik_software_2017, title = {Software {Architecture} for {Big} {Data} and the {Cloud}}, isbn = {978-0-12-809338-2}, url = {https://books.google.de/books?hl=de&lr=&id=zvPtDQAAQBAJ}, abstract = {Software Architecture for Big Data and the Cloud is designed to be a single resource that brings together research on how software architectures can solve the challenges imposed by building big data software systems. The challenges of big data on the software architecture can relate to scale, security, integrity, performance, concurrency, parallelism, and dependability, amongst others. Big data handling requires rethinking architectural solutions to meet functional and non-functional requirements related to volume, variety and velocity. The book's editors have varied and complementary backgrounds in requirements and architecture, specifically in software architectures for cloud and big data, as well as expertise in software engineering for cloud and big data. This book brings together work across different disciplines in software engineering, including work expanded from conference tracks and workshops led by the editors. Discusses systematic and disciplined approaches to building software architectures for cloud and big data with state-of-the-art methods and techniques Presents case studies involving enterprise, business, and government service deployment of big data applications Shares guidance on theory, frameworks, methodologies, and architecture for cloud and big data}, language = {en}, publisher = {Morgan Kaufmann}, author = {Mistrik, Ivan and Bahsoon, Rami and Ali, Nour and Heisel, Maritta and Maxim, Bruce}, month = jun, year = {2017}, note = {Google-Books-ID: zvPtDQAAQBAJ}, keywords = {Computers / Software Development \& Engineering / General}, }
@article{bodard_standards_2017, title = {Standards for {Networking} {Ancient} {Person} data: {Digital} approaches to problems in prosopographical space}, copyright = {Copyright (c) 2017}, issn = {2364-7957}, shorttitle = {Standards for {Networking} {Ancient} {Person} data}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/37975}, doi = {10.11588/dco.2017.0.37975}, abstract = {Prosopographies disambiguate names appearing in sources by creating lists of persons, but the progress of scholarship now makes these lists difficult to maintain. In a digital context unique stable identifiers can be reshuffled ad libitum when searching and ordering information. Digital data increasingly brings together complementary research outputs: the Standards for Networking Ancient Prosopographies project takes on the challenge of creating an aggregated resource, adopting a Linked Open Data approach. In this paper we shall present three case studies highlighting the promise and problems of encoding unambiguous identities, titulature and other disambiguating information, and treating divine figures as person-data, respectively. Digital approaches are tools for research, assisting rather than replacing the historian, who remains central to the research endeavor.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Bodard, Gabriel and Cayless, Hugh and Depauw, Mark and Isaksen, Leif and Lawrence, Faith and Rahtz, Sebastian}, month = nov, year = {2017}, pages = {28--43}, }
@article{pockelmann_paraphrasensuche_2017, title = {Paraphrasensuche mittels word2vec und der {Word} {Mover}’s {Distance} im {Altgriechischen}}, copyright = {Copyright (c) 2017 Digital Classics Online}, issn = {2364-7957}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/40185}, doi = {10.11588/dco.2017.0.40185}, abstract = {To find receptions of Plato‘s work within the ancient Greek literature, automatic methods would be a useful assistance. Unfortunately, such methods are often knowledge-based and thus restricted to extensively annotated texts, which are not available to a sufficient extent for ancient Greek. In this article, we describe an approach that is based on the distributional hypotheses instead, to overcome the problem of missing annotations. This approach uses word2vec and the related Word Mover‘s Distance to determine phrases with similar meaning. Despite its experimental state, the method produces some meaningful results as shown in three examples.}, language = {de}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Pöckelmann, Marcus and Ritter, Jörg and Wöckener-Gade, Eva and Schubert, Charlotte}, month = dec, year = {2017}, keywords = {Word Mover’s Distance}, pages = {24--36}, }
@article{chaudhuri_bioinformatics_2017, title = {Bioinformatics and {Classical} {Literary} {Study}}, volume = {Numéro spécial sur le traitement assisté par ordinateur de l‘intertextualité dans les langues anciennes}, issn = {2416-5999}, url = {https://jdmdh.episciences.org/3807}, doi = {10.46298/jdmdh.1386}, abstract = {This paper describes the Quantitative Criticism Lab, a collaborative initiative between classicists, quantitative biologists, and computer scientists to apply ideas and methods drawn from the sciences to the study of literature. A core goal of the project is the use of computational biology, natural language processing, and machine learning techniques to investigate authorial style, intertextuality, and related phenomena of literary significance. As a case study in our approach, here we review the use of sequence alignment, a common technique in genomics and computational linguistics, to detect intertextuality in Latin literature. Sequence alignment is distinguished by its ability to find inexact verbal similarities, which makes it ideal for identifying phonetic echoes in large corpora of Latin texts. Although especially suited to Latin, sequence alignment in principle can be extended to many other languages.}, number = {Project presentations}, urldate = {2023-08-26}, journal = {Journal of Data Mining \& Digital Humanities}, author = {Chaudhuri, Pramit and Dexter, Joseph P.}, month = aug, year = {2017}, note = {Publisher: Episciences.org}, }
@article{dexter_quantitative_2017, title = {Quantitative criticism of literary relationships}, volume = {114}, url = {https://www.pnas.org/doi/abs/10.1073/pnas.1611910114}, doi = {10.1073/pnas.1611910114}, abstract = {Authors often convey meaning by referring to or imitating prior works of literature, a process that creates complex networks of literary relationships (“intertextuality”) and contributes to cultural evolution. In this paper, we use techniques from stylometry and machine learning to address subjective literary critical questions about Latin literature, a corpus marked by an extraordinary concentration of intertextuality. Our work, which we term “quantitative criticism,” focuses on case studies involving two influential Roman authors, the playwright Seneca and the historian Livy. We find that four plays related to but distinct from Seneca’s main writings are differentiated from the rest of the corpus by subtle but important stylistic features. We offer literary interpretations of the significance of these anomalies, providing quantitative data in support of hypotheses about the use of unusual formal features and the interplay between sound and meaning. The second part of the paper describes a machine-learning approach to the identification and analysis of citational material that Livy loosely appropriated from earlier sources. We extend our approach to map the stylistic topography of Latin prose, identifying the writings of Caesar and his near-contemporary Livy as an inflection point in the development of Latin prose style. In total, our results reflect the integration of computational and humanistic methods to investigate a diverse range of literary questions.}, number = {16}, urldate = {2023-08-26}, journal = {Proceedings of the National Academy of Sciences}, author = {Dexter, Joseph P. and Katz, Theodore and Tripuraneni, Nilesh and Dasgupta, Tathagata and Kannan, Ajay and Brofos, James A. and Bonilla Lopez, Jorge A. and Schroeder, Lea A. and Casarez, Adriana and Rabinovich, Maxim and Haimson Lushkov, Ayelet and Chaudhuri, Pramit}, month = apr, year = {2017}, note = {Publisher: Proceedings of the National Academy of Sciences}, pages = {E3195--E3204}, }
@article{almas_perseids_2017, title = {Perseids: {Experimenting} with {Infrastructure} for {Creating} and {Sharing} {Research} {Data} in the {Digital} {Humanities}}, volume = {16}, number = {19}, journal = {Data Science Journal}, author = {Almas, B.}, year = {2017}, pages = {1--17}, }
@inproceedings{torres_aguilar_named_2016, address = {Krakow}, title = {Named entity recognition applied on a data base of {Medieval} {Latin} charters. {The} case of chartae burgundiae.}, volume = {1632}, isbn = {1613-0073}, url = {https://ceur-ws.org/Vol-1632/paper_9.pdf}, abstract = {The work on the named entity recognition (NER) in databases of historical texts has been placed among the most promising new ways to implement best recovery and managements tools for exploring mass data. In this paper, we describe the application processing NER through a modelling with CRF on an annotated database of Burgundy collection of charters from the tenth to thirteenth centuries. The aim is to generate a model for automatic recognition of named entities in historical sources. We discuss the nature of historical documents in the corpus and extraction of rules, and we expose adaptation to the processing algorithm and the most common problems encountered in Medio Latin texts using diplomatic formularies, which is an atypical case within the NER studies.}, language = {en}, booktitle = {Proceedings of the 3rd {HistoInformatics} {Workshop}}, author = {Torres Aguilar, Sergio Octavio and Tannier, Xavier and Chastang, Pierre}, year = {2016}, pages = {67--71}, }
@inproceedings{erdmann_challenges_2016, address = {Osaka, Japan}, title = {Challenges and {Solutions} for {Latin} {Named} {Entity} {Recognition}}, abstract = {Although spanning thousands of years and genres as diverse as liturgy, historiography, lyric and other forms of prose and poetry, the body of Latin texts is still relatively sparse compared to English. Data sparsity in Latin presents a number of challenges for traditional Named Entity Recognition techniques. Solving such challenges and enabling reliable Named Entity Recognition in Latin texts can facilitate many down-stream applications, from machine translation to digital historiography, enabling Classicists, historians, and archaeologists for instance, to track the relationships of historical persons, places, and groups on a large scale. This paper presents the first annotated corpus for evaluating Named Entity Recognition in Latin, as well as a fully supervised model that achieves over 90\% F-score on a held-out test set, significantly outperforming a competitive baseline. We also present a novel active learning strategy that predicts how many and which sentences need to be annotated for named entities in order to attain a specified degree of accuracy when recognizing named entities automatically in a given text. This maximizes the productivity of annotators while simultaneously controlling quality.}, booktitle = {Proceedings of the {Workshop} on {Language} {Technology} {Resources} and {Tools} for {Digital} {Humanities} ({LT4DH})}, publisher = {The COLING 2016 Organizing Committee.}, author = {Erdmann, Alexander and Brown, Christopher and Joseph, Brian and Janse, Mark and Ajaka, Petra and Elsner, Micha and De Marneffe, Marie-Catherine}, year = {2016}, pages = {85--93}, }
@article{springmann_latmor_2016, title = {{LatMor} : {A} {Latin} {Finite}-{State} {Morphology} {Encoding} {Vowel} {Quantity}}, volume = {2}, url = {doi:10.1515/opli-2016-0019.}, number = {1}, journal = {Open linguistics}, author = {Springmann, Uwe and Schmid, Helmut and Najock, Dietmar}, year = {2016}, pages = {386--392}, }
@book{korkiakangas_subject_2016, address = {Helsinki}, series = {Commentationes {Humanarum} {Litterarum}}, title = {Subject {Case} in the {Latin} of {Tuscan} {Charters} of the 8th and 9th {Centuries}.}, volume = {133}, publisher = {Societas Scientiarum Fennica}, author = {Korkiakangas, Timo}, year = {2016}, }
@incollection{mahony_open_2016, title = {Open {Education} and {Open} {Educational} {Resources} for the {Teaching} of {Classics} in the {UK}}, url = {https://www.ubiquitypress.com/site/books/10.5334/bat/}, abstract = {Edited by organizers of “Digital Classicist” seminars in London and Berlin, this volume addresses the impact of computational approaches to the study of antiquity on audiences other than the scholars who conventionally publish it. In addition to colleagues in classics and digital humanities, the eleven chapters herein concern and are addressed to students, heritage professionals and “citizen scientists”.{\textless}br{\textgreater}{\textless}br{\textgreater}Each chapter is a scholarly contribution, presenting research questions in the classics, digital humanities or, in many cases, both. They are all also examples of work within one of the most important areas of academia today: scholarly research and outputs that engage with collaborators and audiences not only including our colleagues, but also students, academics in different fields including the hard sciences, professionals and the broader public. Collaboration and scholarly interaction, particularly with better-funded and more technically advanced disciplines, is essential to digital humanities and perhaps even more so to digital classics. The international perspectives on these issues are especially valuable in an increasingly connected, institutionally and administratively diverse world.{\textless}br{\textgreater}{\textless}br{\textgreater}This book addresses the broad range of issues scholars and practitioners face in engaging with students, professionals and the public, in accessible and valuable chapters from authors of many backgrounds and areas of expertise, including language and linguistics, history, archaeology and architecture. This collection will be of interest to teachers, scientists, cultural heritage professionals, linguists and enthusiasts of history and antiquity.}, language = {en}, urldate = {2020-02-04}, booktitle = {Digital {Classics} {Outside} the {Echo}-{Chamber}}, publisher = {Ubiquity Press}, author = {Mahony, Simon}, editor = {Romanello, Matteo and Bodard, Gabriel}, month = apr, year = {2016}, doi = {10.5334/bat}, keywords = {reference}, pages = {33--50}, }
@inproceedings{nivre_universal_2016, title = {Universal dependencies v1: {A} multilingual treebank collection}, url = {https://www.aclweb.org/anthology/L16-1262.pdf}, author = {Nivre, Joakim and De Marneffe, Marie-Catherine and Ginter, Filip and Goldberg, Yoav and Hajic, Jan and Manning, Christopher D and McDonald, Ryan and Petrov, Slav and Pyysalo, Sampo and Silveira, Natalia}, year = {2016}, pages = {1659--1666}, }
@article{kirby_computational_2016, title = {A {Computational} {Method} for {Comparative} {Greek} and {Latin} {Prosimetrics}}, language = {en}, author = {Kirby, Joseph Tyler}, year = {2016}, }
@article{stover_reassessing_2016, title = {{REASSESSING} {THE} {APULEIAN} {CORPUS}: {A} {COMPUTATIONAL} {APPROACH} {TO} {AUTHENTICITY}}, volume = {66}, issn = {0009-8388, 1471-6844}, shorttitle = {{REASSESSING} {THE} {APULEIAN} {CORPUS}}, url = {https://www.cambridge.org/core/journals/classical-quarterly/article/abs/reassessing-the-apuleian-corpus-a-computational-approach-to-authenticity/6BFD216C8AC552673B16BBA0FA40FF54}, doi = {10.1017/S0009838816000768}, abstract = {The renaissance of Apuleian studies of the past few decades shows no signs of abating.1 The summer of 2014 may well be the highest watermark yet recorded in the tide of interest in Apuleius: June and July alone saw the release of two monographs, one each from Oxford University Press and Cambridge University Press, and one edited conference volume, from Routledge.2 The clearest sign that the sophist of Madauros has come into his own is his admission into the exclusive club of the Oxford Classical Texts: the first volume of his complete works containing the Metamorphoses edited by Maaike Zimmerman came out in 2012. One of the most salutary effects of this renewed interest has been the reappraisal of the ‘whole Apuleius’: Apuleius has more to offer than just the Metamorphoses, and recent scholarship on the rhetorica and the philosophica have shown not only how these opera minora can help us understand the opus maius, but also how they are important and interesting documents in their own right.3}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {The Classical Quarterly}, author = {Stover, Justin and Kestemont, Mike}, month = dec, year = {2016}, note = {Publisher: Cambridge University Press}, pages = {645--672}, }
@inproceedings{elsner_automatic_2016, address = {Berlin, Germany}, title = {Automatic discovery of {Latin} syntactic changes}, url = {http://aclweb.org/anthology/W16-2120}, doi = {10.18653/v1/W16-2120}, abstract = {Syntactic change tends to affect constructions, but treebanks annotate lower-level structure: PCFG rules or dependency arcs. This paper extends prior work in native language identification, using Tree Substitution Grammars to discover constructions which can be tested for historical variability. In a case study comparing Classical and Medieval Latin, the system discovers several constructions corresponding to known historical differences, and learns to distinguish the two varieties with high accuracy. Applied to an intermediate text (the Vulgate Bible), it indicates which changes between the eras were already occurring at this earlier stage.}, language = {en}, urldate = {2023-08-26}, booktitle = {Proceedings of the 10th {SIGHUM} {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities}}, publisher = {Association for Computational Linguistics}, author = {Elsner, Micha and Lane, Emily}, year = {2016}, pages = {156--164}, }
@article{field_automated_2016, title = {An {Automated} {Approach} to {Syntax}-based {Analysis} of {Classical} {Latin}}, copyright = {Copyright (c) 2016 Anjalie Field}, issn = {2364-7957}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/32315}, doi = {10.11588/dco.2016.0.32315}, abstract = {The goal of this study is to present an automated method for analyzing the style of Latin authors. Many of the common automated methods in stylistic analysis are based on lexical measures, which do not work well with Latin because of the language’s high degree of inflection and free word order. In contrast, this study focuses on analysis at a syntax level by examining two constructions, the ablative absolute and the cum clause. These constructions are often interchangeable, which suggests an author’s choice of construction is typically more stylistic than functional. We first identified these constructions in hand-annotated texts. Next we developed a method for identifying the constructions in unannotated texts, using probabilistic morphological tagging. Our methods identified constructions with enough accuracy to distinguish among different genres and different authors. In particular, we were able to determine which book of Caesar’s Commentarii de Bello Gallico was not written by Caesar. Furthermore, the usage of ablative absolutes and cum clauses observed in this study is consistent with the usage scholars have observed when analyzing these texts by hand. The proposed methods for an automatic syntax-based analysis are shown to be valuable for the study of classical literature.}, language = {en}, urldate = {2023-08-26}, journal = {Digital Classics Online}, author = {Field, Anjalie}, month = dec, year = {2016}, pages = {57--78}, }
@article{kestemont_authenticating_2016, title = {Authenticating the writings of {Julius} {Caesar}}, volume = {63}, issn = {09574174}, url = {https://linkinghub.elsevier.com/retrieve/pii/S0957417416303116}, doi = {10.1016/j.eswa.2016.06.029}, language = {en}, urldate = {2023-07-23}, journal = {Expert Systems with Applications}, author = {Kestemont, Mike and Stover, Justin and Koppel, Moshe and Karsdorp, Folgert and Daelemans, Walter}, month = nov, year = {2016}, pages = {86--96}, }
@book{speich_algorithms_2016, address = {Düsseldorf}, title = {Algorithms and {Aristotle}}, isbn = {978-3-945627-13-6}, abstract = {A glossary of key terms on digitalization and its effects on the labour market and education can be found in alphabetical order throughout the book}, publisher = {Vodafone Foundation Germany}, editor = {Speich, M.}, year = {2016}, }
@article{stover_authorship_2016, title = {The {Authorship} of the "{Historia} {Augusta}": {Two} {New} {Computational} {Studies}}, volume = {59}, issn = {0076-0730}, shorttitle = {The {Authorship} of the "{Historia} {Augusta}"}, url = {https://www.jstor.org/stable/44254158}, abstract = {The case of the Historia Augusta, a collection of imperial biographies from Hadrian to Cams supposedly written by six different authors, provided the impetus for the introduction of computational methods into the Echtheitskritik of ancient authors in 1979. After a flurry of studies in the 1990s, interest waned, particularly because most of those studies seemed to support conclusions incompatible with the scholarly consensus on the question. In the paper, we approach this question with the new tool of authorship verification – one of the most promising approaches in forensic stylometry today – as well as the established method of principal components analysis to demonstrate that there is no simple alternative between single and multiple authorship, and that the results of a computational analysis are in fact compatible with the results obtained from historical, literary, and philological analysis.}, number = {2}, urldate = {2023-07-23}, journal = {Bulletin of the Institute of Classical Studies}, author = {Stover, Justin A. and Kestemont, Mike}, year = {2016}, note = {Publisher: Wiley}, pages = {140--157}, }
@inproceedings{straka_udpipe_2016, title = {{UDPipe}: {Trainable} {Pipeline} for {Processing} {CoNLL}-{U} {Files} {Performing} {Tokenization}, {Morphological} {Analysis}, {POS} {Tagging} and {Parsing}}, booktitle = {{LREC}}, author = {Straka, Milan and Hajic, Jan and Straková, Jana}, year = {2016}, pages = {4290--4297}, }
@article{schubert_close_2015, title = {Close {Reading} und {Distant} {Reading}. {Methoden} der {Altertumswissenschaften} in der {Gegenwart}}, copyright = {Copyright (c) 2015 Charlotte Schubert}, issn = {2364-7957}, url = {https://journals.ub.uni-heidelberg.de/index.php/dco/article/view/20483}, doi = {10.11588/dco.2015.1.20483}, abstract = {\_}, language = {de}, urldate = {2025-02-02}, journal = {Digital Classics Online}, author = {Schubert, Charlotte}, month = may, year = {2015}, keywords = {Visualisierung}, pages = {1--6}, }
@inproceedings{broux_developing_2015, address = {Cham}, title = {Developing {Onomastic} {Gazetteers} and {Prosopographies} for the {Ancient} {World} {Through} {Named} {Entity} {Recognition} and {Graph} {Visualization}: {Some} {Examples} from {Trismegistos} {People}}, isbn = {978-3-319-15168-7}, shorttitle = {Developing {Onomastic} {Gazetteers} and {Prosopographies} for the {Ancient} {World} {Through} {Named} {Entity} {Recognition} and {Graph} {Visualization}}, doi = {10.1007/978-3-319-15168-7_38}, abstract = {Developing prosopographies or onomastic lists in a non-digital environment used to be a painstaking and time-consuming exercise, involving manual labour by teams of researchers, often taking decades. For some scholarly disciplines from the ancient world this is still true, especially those studying non-alphabetical writing systems that lack a uniform transcription system, e.g. Demotic. But for many others, such as Greek and Latin, digital full text corpora in Unicode are now available, often even freely accessible. In this paper we illustrate, on the basis of Trismegistos, how data collection through Named Entity Recognition and visualization through Social Network Analysis have huge potential to speed up the creation of onomastic lists and the development of prosopographies.}, language = {en}, booktitle = {Social {Informatics}}, publisher = {Springer International Publishing}, author = {Broux, Yanne and Depauw, Mark}, editor = {Aiello, Luca Maria and McFarland, Daniel}, year = {2015}, pages = {304--313}, }
@article{eckhoff_linguistics_2015, title = {Linguistics vs. digital editions: {The} {Tromsø} {Old} {Russian} and {OCS} {Treebank}}, url = {https://munin.uit.no/bitstream/handle/10037/22366/article.pdf?sequence=2&isAllowed=y}, journal = {Scripta \& e-Scripta}, author = {Eckhoff, H. M. and Bredicevskis, A.}, year = {2015}, pages = {9--25}, }
@article{fusi_multilanguage_2015, title = {A {Multilanguage}, {Modular} {Framework} for {Metrical} {Analysis} : {It} {Patterns} and {Theorical} {Issues}}, volume = {199}, doi = {doi:10.3917/lang.199.0041.}, journal = {Langages}, author = {Fusi, DanieleFusi}, year = {2015}, pages = {41--66}, }
@article{de_felice_classes_2015, title = {{CLaSSES} : {A} {New} {Digital} {Resource} for {Latin} {Epigraphy}}, volume = {1}, doi = {doi:10.1418/90427}, number = {1}, journal = {Italian journal of computational linguistics}, author = {De Felice, Irene and Donati, Margherita and Giovanna, Marotta}, year = {2015}, pages = {125--136}, }
@article{mcgillivray_computational_2015, title = {Computational {Valency} {Lexica} for {Latin} and {Greek} in {Use} : {A} {Case} {Study} of {Syntactic} {Ambiguity}}, volume = {14}, url = {doi:10.1515/joll-2015-0005}, number = {1}, journal = {Journal of Latin Linguistics}, author = {McGillivray, Barbara and Vatri, Alessandro}, year = {2015}, pages = {101--126}, }
@inproceedings{bjerva_word_2015, address = {Beijing, China}, title = {Word {Embeddings} {Pointing} the {Way} for {Late} {Antiquity}}, url = {https://www.aclweb.org/anthology/W15-3708}, doi = {10.18653/v1/W15-3708}, urldate = {2021-04-24}, booktitle = {Proceedings of the 9th {SIGHUM} {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities} ({LaTeCH})}, publisher = {Association for Computational Linguistics}, author = {Bjerva, Johannes and Praet, Raf}, month = jul, year = {2015}, pages = {53--57}, }
@book{richards_software_2015, title = {Software {Architecture} {Patterns}}, url = {http://103.62.146.201:8081/xmlui/handle/1/5665}, abstract = {It’s all too common for developers to start coding an application without a formal architecture in place. Without a clear and well- defined architecture, most developers and architects will resort to}, language = {en}, urldate = {2023-08-31}, author = {Richards, Mark}, year = {2015}, note = {Accepted: 2018-01-19T11:08:40Z}, }
@article{simon_linking_2015, title = {Linking early geospatial documents, one place at a time: annotation of geographic documents with {Recogito}}, volume = {10}, issn = {1790-3769}, shorttitle = {Linking early geospatial documents, one place at a time}, url = {http://www.e-perimetron.org/Vol_10_2/Simon_et_al.pdf}, abstract = {Recogito is an open source tool for the semi-automatic annotation of place references in maps and texts. It was developed as part of the Pelagios 3 research project, which aims to build up a comprehensive directory of places referred to in early maps and geographic writing predating the year 1492. Pelagios 3 focuses specifically on sources from the Classical Latin, Greek and Byzantine periods; on Mappae Mundi and narrative texts from the European Medieval period; on Late Medieval Portolans; and on maps and texts from the early Islamic and early Chinese traditions. Since the start of the project in September 2013, the team has harvested more than 120,000 toponyms, manually verifying almost 60,000 of them. Furthermore, the team held two public annotation workshops supported through the Open Humanities Awards 2014. In these workshops, a mixed audience of students and academics of different backgrounds used Recogito to add several thousand contributions on each workshop day. A number of benefits arise out of this work: on the one hand, the digital identification of places – and the names used for them – makes the documents' contents amenable to information retrieval technology, i.e. documents become more easily search- and discoverable to users than through conventional metadata-based search alone. On the other hand, the documents are opened up to new forms of re-use. For example, it becomes possible to “map” and compare the narrative of texts, and the contents of maps with modern day tools like Web maps and GIS; or to analyze and contrast documents’ geographic properties, toponymy and spatial relationships. Seen in a wider context, we argue that initiatives such as ours contribute to the growing ecosystem of the “Graph of Humanities Data” that is gathering pace in the Digital Humanities (linking data about people, places, events, canonical references, etc.), which has the potential to open up new avenues for computational and quantitative research in a variety of fields including History, Geography, Archaeology, Classics, Genealogy and Modern Languages.}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {e-Perimetron}, author = {Simon, Rainer and Barker, Elton and Isaksen, Leif and de Soto Cañamares, Pau}, year = {2015}, note = {Number: 2}, pages = {49--59}, }
@article{revellio_classics_2015, title = {Classics and the {Digital} {Age} {Advantages} and limitations of digital text analysis in classical philology}, issn = {2364-5342}, url = {https://kops.uni-konstanz.de/server/api/core/bitstreams/620defc4-effd-4224-bfb6-782e20748e01/content}, abstract = {Die Klassische Philologie nahm computergestützte Methoden der Textanalyse früh als Chance wahr. Um einen Einblick in die jüngsten Entwicklungen der digitalen Textanalyse im Bereich der Latinistik zu geben wird eine Auswahl bestehender Textdatenbanken wie gängiger Analysetools vorgestellt, wobei insbesondere auf das Phänomen der Intertextualität als Untersuchungsfeld fokussiert wird. Zudem werden unmittelbar verknüpfte Themen wie die Digitalisierung und langfristige Erhaltung antiker Texte, der Status unterschiedlicher Text-Surrogate sowie die Notwendigkeit fremdsprachlicher Kenntnisse diskutiert.}, urldate = {2023-08-26}, author = {Revellio, Marie}, year = {2015}, pages = {1--16}, }
@book{blackwell_canonical_2015, title = {The {Canonical} {Text} {Services} {URN} {Specification}, {Version} 2.0.{Rc}.1 [{CITE} / {URN}]}, author = {Blackwell, Christopher and Smith, Neel}, year = {2015}, }
@article{coffee_tesserae_2014, title = {Tesserae: {A} {Search} {Engine} for {Allusion}}, shorttitle = {Tesserae}, url = {https://hcommons.org/deposits/item/hc:12221/}, abstract = {The Tesserae Project is an interdisciplinary research effort employing computational methods to detect and analyze literary allusion (a form of text reuse) currently focusing on Latin and ancient Greek. The Project seeks funding to create a fully-functional, publicly available tool to detect similar phrases in two texts at rates that approach those of literary commentators. To this end, funding will support adding sensitivity to word meaning, phrase context, and sound similarity. Detection rate improvements will be measured against a set of 3000 parallel phrases previously graded for literary significance. A revised website will inform researchers of research results and new functions of the tool. The project team will give presentations and produce publications explaining the function, results, and theoretical consequences of the fully operational tool. This work is preliminary to an out-year Implementation Phase that will see the addition of English, French, Italian, and Spanish.}, language = {en-US}, urldate = {2023-08-26}, author = {Coffee, Neil and Koenig, Jean-Pierre}, month = may, year = {2014}, }
@inproceedings{passarotti_syntax_2014, address = {Gothenburg, Sweden}, title = {From {Syntax} to {Semantics}. {First} {Steps} {Towards} {Tectogrammatical} {Annotation} of {Latin}}, url = {http://aclweb.org/anthology/W14-0615}, doi = {10.3115/v1/W14-0615}, abstract = {Assuming that collaboration between theoretical and computational linguistics is essential in projects aimed at developing language resources like annotated corpora, this paper presents the first steps of the semantic annotation of the Index Thomisticus Treebank, a dependency-based treebank of Medieval Latin. The semantic layer of annotation of the treebank is detailed and the theoretical framework supporting the annotation style is explained and motivated.}, language = {en}, urldate = {2023-08-26}, booktitle = {Proceedings of the 8th {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities} ({LaTeCH})}, publisher = {Association for Computational Linguistics}, author = {Passarotti, Marco}, year = {2014}, pages = {100--109}, }
@article{almas_linked_2014, title = {Linked {Data} in the {Perseus} {Digital} {Library}}, volume = {7}, url = {http://dlib.nyu.edu/awdl/isaw/isaw-papers/7/almas-babeu-krohn/}, number = {3}, journal = {ISAW Papers}, author = {Almas, B. and Babeu, A. and Krohn, A.}, year = {2014}, }
@article{koppel_determining_2014, title = {Determining if two documents are written by the same author}, volume = {65}, issn = {2330-1643}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.22954}, doi = {10.1002/asi.22954}, abstract = {Almost any conceivable authorship attribution problem can be reduced to one fundamental problem: whether a pair of (possibly short) documents were written by the same author. In this article, we offer an (almost) unsupervised method for solving this problem with surprisingly high accuracy. The main idea is to use repeated feature subsampling methods to determine if one document of the pair allows us to select the other from among a background set of “impostors” in a sufficiently robust manner.}, language = {en}, number = {1}, urldate = {2023-07-23}, journal = {Journal of the Association for Information Science and Technology}, author = {Koppel, Moshe and Winter, Yaron}, year = {2014}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/asi.22954}, keywords = {automatic classification, machine learning, natural language processing}, pages = {178--187}, }
@incollection{tiepmar_new_2014, title = {A new implementation for canonical text services [{CTS}]}, isbn = {978-1-63266-401-3}, booktitle = {Proceedings of the 8th {Workshop} on {Language} {Technology} for {Cultural} {Heritage}, {Social} {Sciences}, and {Humanities} ({LaTeCH})}, author = {Tiepmar, J. and Teichmann, C. and Heyer, G. and Berti, M. and Crane, G.}, year = {2014}, pages = {1--8}, }
@article{eckhoff_mapping_2013, title = {Mapping out the {Source} {Domain} : {Evidence} from {Parallel} {Old} {Indo}-{European} {Data}}, volume = {37}, doi = {doi:10.1075/sl.37.2.03eck}, number = {2}, journal = {Studies in language}, author = {Eckhoff, Hanne Martine and Thomason, Olga A. and Swart, Peter}, year = {2013}, pages = {302--355}, }
@book{moretti_distant_2013, title = {Distant {Reading}}, isbn = {978-1-78168-333-0}, url = {https://books.google.de/books?hl=en&lr=&id=Wo4110IdRLMC}, abstract = {WINNER OF THE NATIONAL BOOK CRITICS CIRCLE AWARD How does a literary historian end up thinking in terms of z-scores, principal component analysis, and clustering coefficients? The essays in Distant Reading led to a new and often contested paradigm of literary analysis. In presenting them here Franco Moretti reconstructs his intellectual trajectory, the theoretical influences over his work, and explores the polemics that have often developed around his positions.From the evolutionary model of “Modern European Literature,” through the geo-cultural insights of “Conjectures of World Literature” and “Planet Hollywood,” to the quantitative findings of “Style, inc.” and the abstract patterns of “Network Theory, Plot Analysis,” the book follows two decades of conceptual development, organizing them around the metaphor of “distant reading,” that has come to define—well beyond the wildest expectations of its author—a growing field of unorthodox literary studies.}, language = {en}, publisher = {Verso Books}, author = {Moretti, Franco}, month = jun, year = {2013}, note = {Google-Books-ID: Wo4110IdRLMC}, keywords = {Literary Collections / Essays, Literary Criticism / Comparative Literature, Literary Criticism / Semiotics \& Theory, reference}, }
@article{mikolov_efficient_2013, title = {Efficient estimation of word representations in vector space}, shorttitle = {{Word2Vec}}, url = {https://arxiv.org/pdf/1301.3781.pdf}, journal = {arXiv preprint arXiv:1301.3781}, author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey}, year = {2013}, keywords = {reference}, pages = {1--12}, }
@book{schmidt_pattern-oriented_2013, title = {Pattern-oriented software architecture, patterns for concurrent and networked objects}, volume = {2}, isbn = {1-118-72517-4}, url = {https://www.researchgate.net/profile/Michael-Stal/publication/215835789_Pattern-Oriented_Software_Architecture_Patterns_for_Concurrent_and_Networked_Objects_Volume_2/links/53fd98440cf2dca8000356c5/Pattern-Oriented-Software-Architecture-Patterns-for-Concurrent-and-Networked-Objects-Volume-2.pdf}, publisher = {John Wiley \& Sons}, author = {Schmidt, Douglas C and Stal, Michael and Rohnert, Hans and Buschmann, Frank}, year = {2013}, }
@article{brown_software_2013, title = {Software architecture for developers}, url = {http://static.codingthearchitecture.com/sddconf2014-software-architecture-for-developers-extract.pdf}, journal = {Coding the Architecture}, author = {Brown, Simon}, year = {2013}, }
@article{andrews_beyond_2013, title = {Beyond the tree of texts: {Building} an empirical model of scribal variation through graph analysis of texts and stemmata}, volume = {28}, issn = {0268-1145}, shorttitle = {Beyond the tree of texts}, url = {https://doi.org/10.1093/llc/fqt032}, doi = {10.1093/llc/fqt032}, abstract = {Stemmatology, or the reconstruction of the transmission history of texts, is a field that stands particularly to gain from digital methods. Many scholars already take stemmatic approaches that rely heavily on computational analysis of the collated text (e.g. Robinson and O’Hara 1996; Salemans 2000; Heikkilä 2005; Windram et al. 2008 among many others). Although there is great value in computationally assisted stemmatology, providing as it does a reproducible result and allowing access to the relevant methodological process in related fields such as evolutionary biology, computational stemmatics is not without its critics. The current state-of-the-art effectively forces scholars to choose between a preconceived judgment of the significance of textual differences (the Lachmannian or neo-Lachmannian approach, and the weighted phylogenetic approach) or to make no judgment at all (the unweighted phylogenetic approach). Some basis for judgment of the significance of variation is sorely needed for medieval text criticism in particular. By this, we mean that there is a need for a statistical empirical profile of the text-genealogical significance of the different sorts of variation in different sorts of medieval texts. The rules that apply to copies of Greek and Latin classics may not apply to copies of medieval Dutch story collections; the practices of copying authoritative texts such as the Bible will most likely have been different from the practices of copying the Lives of local saints and other commonly adapted texts. It is nevertheless imperative that we have a consistent, flexible, and analytically tractable model for capturing these phenomena of transmission. In this article, we present a computational model that captures most of the phenomena of text variation, and a method for analysis of one or more stemma hypotheses against the variation model. We apply this method to three ‘artificial traditions’ (i.e. texts copied under laboratory conditions by scholars to study the properties of text variation) and four genuine medieval traditions whose transmission history is known or deduced in varying degrees. Although our findings are necessarily limited by the small number of texts at our disposal, we demonstrate here some of the wide variety of calculations that can be made using our model. Certain of our results call sharply into question the utility of excluding ‘trivial’ variation such as orthographic and spelling changes from stemmatic analysis.}, number = {4}, urldate = {2023-08-26}, journal = {Literary and Linguistic Computing}, author = {Andrews, Tara L. and Macé, Caroline}, month = dec, year = {2013}, pages = {504--521}, }
@book{mcgillivray_methods_2013, title = {Methods in {Latin} {Computational} {Linguistics}}, isbn = {978-90-04-26012-2}, abstract = {In Methods in Latin Computational Linguistics, Barbara McGillivray presents some of the most significant methodological foundations of the emerging field of Latin Computational Linguistics. The reader will find an overview of the computational resources and tools available for Latin and three corpus case studies covering morpho-syntactic and lexical-semantic aspects of Latin verb valency, as well as quantitative diachronic explorations of the argument realization of Latin prefixed verbs. The computational models and the multivariate data analysis techniques employed are explained with a detailed but accessible language. Barbara McGillivray convincingly shows the challenges and opportunities of combining computational methods and historical language data, and contributes to driving the technological change that is affecting Historical Linguistics and the Humanities.}, language = {en}, publisher = {BRILL}, author = {McGillivray, Barbara}, month = nov, year = {2013}, note = {Google-Books-ID: 33dfAgAAQBAJ}, keywords = {Computers / Artificial Intelligence / Natural Language Processing, Language Arts \& Disciplines / Linguistics / Etymology, Language Arts \& Disciplines / Linguistics / General, Language Arts \& Disciplines / Linguistics / Historical \& Comparative}, }
@article{coffee_tesserae_2013, title = {The {Tesserae} {Project}: intertextual analysis of {Latin} poetry}, volume = {28}, issn = {0268-1145, 1477-4615}, shorttitle = {The {Tesserae} {Project}}, url = {https://academic.oup.com/dsh/article-lookup/doi/10.1093/llc/fqs033}, doi = {10.1093/llc/fqs033}, abstract = {Tesserae is a web-based tool for automatically detecting allusions in Latin poetry. Although still in the start-up phase, it already is capable of identifying significant numbers of known allusions, as well as similar numbers of allusions previously unnoticed by scholars. In this article, we use the tool to examine allusions to Vergil’s Aeneid in the first book of Lucan’s Civil War. Approximately 3,000 linguistic parallels returned by the program were compared with a list of known allusions drawn from commentaries. Each was examined individually and graded for its literary significance, in order to benchmark the program’s performance. All allusions from the program and commentaries were then pooled in order to examine broad patterns in Lucan’s allusive techniques which were largely unapproachable without digital methods. Although Lucan draws relatively constantly from Vergil’s generic language in order to maintain the epic idiom, this baseline is punctuated by clusters of pointed allusions, in which Lucan frequently subverts Vergil’s original meaning. These clusters not only attend the most significant characters and events but also play a role in structuring scene transitions. Work is under way to incorporate the ability to match on word meaning, phrase context, as well as metrical and phonological features into future versions of the program.}, language = {en}, number = {2}, urldate = {2023-08-26}, journal = {Literary and Linguistic Computing}, author = {Coffee, N. and Koenig, J.-P. and Poornima, S. and Forstall, C. W. and Ossewaarde, R. and Jacobson, S. L.}, month = jun, year = {2013}, pages = {221--228}, }
@inproceedings{koppel_automatically_2013, address = {Seattle, Washington, USA}, title = {Automatically {Identifying} {Pseudepigraphic} {Texts}}, url = {https://aclanthology.org/D13-1151}, urldate = {2023-07-23}, booktitle = {Proceedings of the 2013 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing}}, publisher = {Association for Computational Linguistics}, author = {Koppel, Moshe and Seidman, Shachar}, month = oct, year = {2013}, pages = {1449--1454}, }
@article{martinez_partspeech_2012, title = {Part‐of‐speech tagging}, volume = {4}, issn = {1939-5108}, url = {https://wires.onlinelibrary.wiley.com/doi/pdfdirect/10.1002/wics.195}, number = {1}, journal = {Wiley Interdisciplinary Reviews: Computational Statistics}, author = {Martinez, Angel R}, year = {2012}, note = {Publisher: Wiley Online Library}, pages = {107--113}, }
@article{crane_introduction_2012, title = {Introduction to the special issue on corpus and computational linguistics, philology, and the linguistic heritage of humanity}, volume = {5}, issn = {1556-4673}, url = {https://doi.org/10.1145/2160165.2160166}, doi = {10.1145/2160165.2160166}, abstract = {The articles in this issue make two complementary assertions: first, language and linguistic sources are a key element of human cultural heritage and, second, we need to integrate the ancient goals of philology with rapidly emerging methods from fields such as Corpus and Computational Linguistics. The first 15,000,000 volumes digitized by Google contained data from more than 400 languages covering more than four thousand years of the human record. We need to develop methods to explore linguistic changes and the ideas that languages encode as these evolve and circulate over millennia and on a global scale.}, number = {1}, urldate = {2023-08-26}, journal = {Journal on Computing and Cultural Heritage}, author = {Crane, Gregory and Lüdeling, Anke}, month = apr, year = {2012}, keywords = {Computational linguistics, corpus linguistics, philology}, pages = {1:1--1:5}, }
@inproceedings{buchler_increasing_2012, address = {Berlin, Heidelberg}, series = {Lecture {Notes} in {Computer} {Science}}, title = {Increasing {Recall} for {Text} {Re}-use in {Historical} {Documents} to {Support} {Research} in the {Humanities}}, isbn = {978-3-642-33290-6}, doi = {10.1007/978-3-642-33290-6_11}, abstract = {High precision text re-use detection allows humanists to discover where and how particular authors are quoted (e.g., the different sections of Plato’s work that come in and out of vogue). This paper reports on on-going work to provide the high recall text re-use detection that humanists often demand. Using an edition of one Greek work that marked quotations and paraphrases from the Homeric epics as our testbed, we were able to achieve a recall of at least 94\% while maintaining a precision of 73\%. This particular study is part of a larger effort to detect text re-use across 15 million words of Greek and 10 million words of Latin available or under development as openly licensed TEI XML.}, language = {en}, booktitle = {Theory and {Practice} of {Digital} {Libraries}}, publisher = {Springer}, author = {Büchler, Marco and Crane, Gregory and Moritz, Maria and Babeu, Alison}, editor = {Zaphiris, Panayiotis and Buchanan, George and Rasmussen, Edie and Loizides, Fernando}, year = {2012}, keywords = {Athenaeus, Homer, historical text re-use, hypertextuality}, pages = {95--100}, }
@article{mimno_computational_2012, title = {Computational historiography: {Data} mining in a century of classics journals}, volume = {5}, issn = {1556-4673, 1556-4711}, shorttitle = {Computational historiography}, url = {https://dl.acm.org/doi/10.1145/2160165.2160168}, doi = {10.1145/2160165.2160168}, abstract = {More than a century of modern Classical scholarship has created a vast archive of journal publications that is now becoming available online. Most of this work currently receives little, if any, attention. The collection is too large to be read by any single person and mostly not of sufficient interest to warrant traditional close reading. This article presents computational methods for identifying patterns and testing hypotheses about Classics as a field. Such tools can help organize large collections, introduce younger scholars to the history of the field, and act as a “survey,” identifying anomalies that can be explored using more traditional methods.}, language = {en}, number = {1}, urldate = {2023-08-26}, journal = {Journal on Computing and Cultural Heritage}, author = {Mimno, David}, month = apr, year = {2012}, pages = {1--19}, }
@article{korkiakangas_challenges_2011, title = {Challenges in {Annotating} {Medieval} {Latin} {Charters}}, volume = {26}, number = {2}, journal = {Journal for Language Technology and Computational Linguistics (JLCL)}, author = {Korkiakangas, T.}, year = {2011}, pages = {105--116}, }
@inproceedings{bamman_ancient_2011, address = {Berlin, Heidelberg}, series = {Theory and {Applications} of {Natural} {Language} {Processing}}, title = {The {Ancient} {Greek} and {Latin} {Dependency} {Treebanks}}, isbn = {978-3-642-20227-8}, doi = {10.1007/978-3-642-20227-8_5}, abstract = {This paper describes the development, composition, and several uses of the Ancient Greek and Latin Dependency Treebanks, large collections of Classical texts in which the syntactic, morphological and lexical information for eachword is made explicit. To date, over 200 individuals from around the world have collaborated to annotate over 350,000 words, including the entirety of Homer’s Iliad and Odyssey, Sophocles’ Ajax, all of the extant works of Hesiod and Aeschylus, and selections from Caesar, Cicero, Jerome, Ovid, Petronius, Propertius, Sallust and Vergil. While perhaps the most straightforward value of such an annotated corpus for Classical philology is the morphosyntactic searching it makes possible, it also enables a large number of downstream tasks as well, such as inducing the syntactic behavior of lexemes and automatically identifying similar passages between texts.}, language = {en}, booktitle = {Language {Technology} for {Cultural} {Heritage}}, publisher = {Springer}, author = {Bamman, David and Crane, Gregory}, editor = {Sporleder, Caroline and van den Bosch, Antal and Zervanou, Kalliopi}, year = {2011}, keywords = {Ancient Greek, Latin, dependency grammar, digital libraries, treebanks}, pages = {79--98}, }
@incollection{wenger_communities_2010, address = {London}, title = {Communities of {Practice} and {Social} {Learning} {Systems}: the {Career} of a {Concept}}, isbn = {978-1-84996-133-2}, shorttitle = {Communities of {Practice} and {Social} {Learning} {Systems}}, url = {https://doi.org/10.1007/978-1-84996-133-2_11}, abstract = {The concept of community of practice was not born in the systems theory tradition. It has its roots in attempts to develop accounts of the social nature of human learning inspired by anthropology and social theory (Lave, 1988; Bourdieu, 1977; Giddens, 1984; Foucault, 1980; Vygotsky, 1978). But the concept of community of practice is well aligned with the perspective of systems traditions. A community of practice itself can be viewed as a simple social system. And a complex social system can be viewed as constituted by interrelated communities of practice. In this essay I first explore the systemic nature of the concept at these two levels. Then I use this foundation to look at the applications of the concept, some of its main critiques, and its potential for developing a social discipline of learning.}, language = {en}, urldate = {2025-02-02}, booktitle = {Social {Learning} {Systems} and {Communities} of {Practice}}, publisher = {Springer}, author = {Wenger, Etienne}, editor = {Blackmore, Chris}, year = {2010}, doi = {10.1007/978-1-84996-133-2_11}, pages = {179--198}, }
@misc{rehurek_software_2010, address = {Valetta, MT}, title = {Software {Framework} for {Topic} {Modelling} with {Large} {Corpora}}, copyright = {LGPL-2.1}, shorttitle = {Gensim}, url = {http://is.muni.cz/publication/884893/en}, abstract = {Topic Modelling for Humans}, urldate = {2024-02-27}, publisher = {University of Malta}, author = {Řehůřek, Radim and Sojka, Petr}, month = may, year = {2010}, note = {Pages: 45–50 Series: Proceedings of LREC 2010 workshop New Challenges for NLP Frameworks original-date: 2011-02-10T07:43:04Z}, }
@article{heyer_challenges_2010, title = {Some {Challenges} {Posed} to {Computer} {Science} by the {eHumanities}}, url = {https://dl.gi.de/server/api/core/bitstreams/d87b4505-32b1-4f27-97cd-3f20aed8d4be/content}, language = {en}, author = {Heyer, Gerhard and Büchler, Marco}, year = {2010}, pages = {524--529}, }
@article{neron_proper_2010, title = {Proper {Generalized} {Decomposition} for {Multiscale} and {Multiphysics} {Problems}}, volume = {17}, issn = {1886-1784}, url = {https://doi.org/10.1007/s11831-010-9053-2}, doi = {10.1007/s11831-010-9053-2}, abstract = {This paper is a review of the developments of the Proper Generalized Decomposition (PGD) method for the resolution, using the multiscale/multiphysics LATIN method, of the nonlinear, time-dependent problems ((visco)plasticity, damage, …) encountered in computational mechanics. PGD leads to considerable savings in terms of computing time and storage, and makes engineering problems which would otherwise be completely out of range of industrial codes accessible.}, language = {en}, number = {4}, urldate = {2023-08-26}, journal = {Archives of Computational Methods in Engineering}, author = {Néron, David and Ladevèze, Pierre}, month = dec, year = {2010}, keywords = {Discontinuous Galerkin Scheme, Domain Decomposition Method, Model Reduction Technique, Proper Generalize Decomposi, Reference Problem}, pages = {351--372}, }
@article{ladeveze_latin_2010, title = {The {LATIN} multiscale computational method and the {Proper} {Generalized} {Decomposition}}, volume = {199}, issn = {00457825}, url = {https://linkinghub.elsevier.com/retrieve/pii/S0045782509002643}, doi = {10.1016/j.cma.2009.06.023}, abstract = {This paper deals with the synergy between the LATIN multiscale method and what is called the Proper Generalized Decomposition (PGD) which is the key of its performances.}, language = {en}, number = {21-22}, urldate = {2023-08-26}, journal = {Computer Methods in Applied Mechanics and Engineering}, author = {Ladevèze, P. and Passieux, J.-C. and Néron, D.}, month = apr, year = {2010}, pages = {1287--1296}, }
@article{bamman_computational_2009, title = {Computational {Linguistics} and {Classical} {Lexicography}}, copyright = {© 2009. This work is published under http://creativecommons.org/licenses/by-nd/4.0/ (the “License”). Notwithstanding the ProQuest Terms and Conditions, you may use this content in accordance with the terms of the License.}, url = {https://www.proquest.com/docview/2555208382?pq-origsite=gscholar&fromopenview=true}, abstract = {Manual lexicography has produced extraordinary results for Greek and Latin, but it cannot in the immediate future provide for all texts the same level of coverage available for the most heavily studied materials. As we build a cyberinfrastructure for Classics in the future, we must explore the role that automatic methods can play within it. Using technologies inherited from the disciplines of computational linguistics and computer science, we can create a complement to these traditional reference works - a dynamic lexicon that presents statistical information about a word’s usage in context, including information about its sense distribution within various authors, genres and eras, and syntactic information as well.}, language = {English}, urldate = {2023-08-26}, author = {Bamman, David and Crane, Gregory}, year = {2009}, note = {Section: Articles}, }
@phdthesis{ehrmann_entitees_2008, address = {Paris}, title = {Les {Entitées} {Nommées}, de la linguistique au {TAL}: {Statut} théorique et méthodes de désambiguïsation}, url = {https://hal.science/tel-01639190v1/document}, language = {fr}, school = {UNIVERSITE PARIS 7 - DENIS DIDEROT}, author = {Ehrmann, Maud}, year = {2008}, }
@article{bamman_case_2008, title = {A {Case} {Study} in {Treebank} {Collaboration} and {Comparison} : {Accusativus} {Cum} {Infinitivo} and {Subordination} in {Latin}}, volume = {90}, journal = {The Prague bulletin of mathematical linguistics}, author = {Bamman, David and Passarotti, Marco and Crane, Gregory R.}, year = {2008}, pages = {109--122}, }
@article{poibeau_dealing_2006, title = {Dealing with {Metonymic} {Readings} of {Named} {Entities}}, url = {https://arxiv.org/pdf/cs/0607052}, abstract = {Dealing with Metonymic Readings of Named Entities Thierry Poibeau (thierry.poibeau@lipn.univ-paris13.fr) Laboratoire d’Informatique de Paris-Nord, Universite Paris 13 and UMR CNRS 7030 99, avenue Jean-Baptiste Clement – 93430 Villetaneuse – France detail our knowledge representation framework, allowing to dynamically compute the semantics of NE sequences from their immediate context. Lastly, we present an implementation and some experiments using the French ESTER corpus and showing significant improvements. Abstract The aim of this paper is to propose a method for tagging named entities (NE), using natural language processing techniques. Beyond their literal meaning, named entities are frequently subject to metonymy. We show the limits of current NE type hierarchies and detail a new proposal aiming at dynamically capturing the semantics of entities in context. This model can analyze complex linguistic phenomena like metonymy, which are known to be difficult for natural language processing but crucial for most applications. We present an implementation and some test using the French ESTER corpus and give significant results. Names, categorization and reference There is a kind of consensus on the fact that categorization and reference of linguistic expressions are related to discrete-continuous space interplay. Categorization is the ability to select parts of the environment and classify them as instances of concepts. The process of attention is then the ability to specifically focus on a part of the observation space that is relevant in a given context (Cruse and Croft, 2004). Selected parts of the observation space is said to be salient. Two important linguistic phenomena are based on a shift in the meaning profile of a word: the highlighting of its different facets and the phenomenon of metonymy (Nunberg, 1995) (Fass, 1997). A metonymy denotates a different concept than the “literal” denotation of a word, whereas the notion of facet only means focusing on a specific aspect of a concept (different parts of the meaning space of a word or “different ways of looking at the same thing”). However, both phenomena correspond to a semantic shift in interpretation (“profile shift”) that appear to be a function of salience (Cruse and Croft, 2004). In this section, we examine different theories concerning this topic, especially the model proposed by Pustejovsky (1995). We then discuss the case of NEs and examine previous work dealing with related questions using Natural Language Processing techniques. Keywords: Metonymy; Named Entities; Categoriza- tion; Semantics; Natural Language Processing. Introduction Categorization is a key question in science and philosophy at least since Aristotle. Many research efforts have been made on this issue in linguistics since text understanding and more generally, reasoning or inferring largely require a precise identification of objects referred to in discourse. Lexical semantics has attracted the major part of research related to these issues in linguistics in the last few years. What is the meaning of an expression? How does it change in context? These are still open questions. Many research projects have addressed the issue of proper name identification in newspaper texts, especially the Message Understanding Conferences (MUC-6, 1995). In these conferences, the first task to achieve is to identify named entities (NE), i.e. proper names, temporal and numerical expressions. This task is generally accomplished according to a pre-defined hierarchy of entity categories. The categorization process relies on the assumption that NEs directly refer to external objects and can thus be easily categorized. In this paper, we show that this assumption is an over-simplification of the problem: many entities are ambiguous and inter-annotator agreement is dramatically low for some categories. We assume that even if NE tagging achieves good performances (over .90 rate of combined precision and recall is frequent on journalistic corpora), NEs are intrinsically ambiguous and cause numerous categorization problems. We propose a new dynamic representation framework in which it is possible to specify the meaning of a NE from its context. In the paper, we report previous work on NE tagging. We then show different cases of polysemous entities in context and some considerations about their referential status. We Pustejovsky’s Generative lexicon (1995) Pustejovsky developed an interesting model for sense selection in context (1995). His proposal – the Generative Lexicon – is based on Davidson's logic model and a strict typed theory developed in Pustejovsky (1995) and more recently in Asher and Pustejovsky (1999). Words like book are called dot object : “dot” is a function enabling to encode two facets of a given word. A book is by default a physical object but some verbs like read or enjoy might activate specific features that coerce the initial type: book then no longer refers to a physical object but to its content (through its “telic role” encoded in a complex structured called the qualia structure). Moreover, complex operations related to the same process explain why John enjoyed his book is interpreted as an ellipsis and imply reading a book.}, urldate = {2025-01-26}, journal = {ArXiv}, author = {Poibeau, Thierry}, year = {2006}, pages = {1--6}, }
@article{binongo_application_1999, title = {The application of principal component analysis to stylometry}, volume = {14}, issn = {0268-1145}, url = {https://doi.org/10.1093/llc/14.4.445}, doi = {10.1093/llc/14.4.445}, abstract = {In recent years principal component analysis has become popular for investigations in computational stylistics, particularly for studies of authorship. The mathematical nature of the theory that underpins the method makes it rather inaccessible to linguists and literary scholars. Consequently, confidence in its correct application is diminished. By first restricting the procedure to the use of two marker words, a pictorial description of its operation is derived. Some characteristics of the method are then examined. Finally, in the context of a Shakespearean example the technique is extended to p words, and suggestions are advanced to alleviate possible shortcomings.}, number = {4}, urldate = {2023-07-23}, journal = {Literary and Linguistic Computing}, author = {Binongo, JNG and Smith, MWA}, month = dec, year = {1999}, pages = {445--466}, }
@article{gulordava_diachronic_nodate, title = {Diachronic {Trends} in {Word} {Order} {Freedom} and {Dependency} {Length} in {Dependency}-{Annotated} {Corpora} of {Latin} and {Ancient} {Greek}}, url = {https://aclanthology.org/W15-2115.pdf}, author = {Gulordava, Kristina and Merlos, Paola}, }
@book{noauthor_diachronic_nodate, address = {Amsterdam; Philadelphia}, series = {Benjamins {Current} {Topics}}, title = {Diachronic {Treebanks} for {Historical} {Linguistics}}, url = {doi:10.1075/bct.113}, number = {113}, publisher = {Benjamins}, }
@article{kabala_computational_nodate, title = {Computational {Authorship} {Attribution} in {Medieval} {Latin} {Corpora} : {The} {Case} of the {Monk} of {Lido} (ca. 1101–08) and {Gallus} {Anonymous} (ca. 1113–17)}, volume = {54}, doi = {doi:10.1007/s10579-018-9424-0.}, number = {1}, journal = {Language resources and evaluation}, author = {Kabala, Jakub}, pages = {25--56}, }
@techreport{martinez_garcia_latin-spanish_nodate, title = {Latin-{Spanish} {Neural} {Machine} {Translation} : {From} the {Bible} to {Saint} {Augustine}.}, number = {Proceedings of the LREC 2020 1st Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2020)}, author = {Martínez Garcia, Eva and García-Tejedor, Álvaro J.}, pages = {94--99}, }
@article{janicke_visualizations_nodate, title = {Visualizations for {Text} {Re}-use}, url = {https://www.informatik.uni-leipzig.de/~stjaenicke/Visualizations_for_Text-Reuse.pdf}, language = {en}, author = {Janicke, Stefan and Geßner, Annette and Buchler, Marco and Scheuermann, Gerik}, }
@article{silvia_visualizing_nodate, title = {Visualizing {Variation} in {Classical} {Text} with {Force} {Directed} {Storylines}}, abstract = {The study of literature is changing dramatically by incorporating new opportunities that digital technology presents. Data visualization overturns the dynamic for literary analysis by revealing and displaying connections and patterns between elements in text. Literary scholars compare and analyze textual variations in different versions of a lost original text and work to reconstruct the original text in the form of a critical edition. A critical edition notes textual variations in extensive footnotes, collectively called a critical apparatus. Information in the apparatus is of great interest to scholars who seek to explore complex relationships between text versions. Motivated by application to classical Latin texts, we adapted the storyline technique to visualize a critical apparatus. The visualization facilitates guided discovery of similarities and dissimilarities between prior text versions, which are difficult to detect and reason about with traditional deep reading and spreadsheet-based methods. Storyline visualizations help users understand and analyze the interactions between entities in a story and explore how entity relationships evolve over time. Typical design considerations in existing storyline techniques include minimizing line crossing and line wiggling, which are computationally intense problems. Generating storyline layouts in real time is a substantial challenge to interactive visualization. Existing storyline techniques support limited user interaction due to the high cost of layout. We contribute an initial force directed layout algorithm that dynamically reflows storyline layouts with best effort response to internal and coordinated interactions. We anticipate that the characteristics of our layout algorithm will allow for graceful response to a wide variety of interaction types, speeds, and patterns. We conducted a user study to evaluate the legibility of our storyline layout after convergence. The evaluation results demonstrate that most users can accurately complete a wide variety of visual metaphor interpretation, reading, and pattern recognition tasks within 20 seconds.}, language = {en}, author = {Silvia, Shejuti and Etemadpour, Ronak and Abbas, June and Huskey, Sam and Weaver, Chris}, }