SEFLAG: Systematic Evaluation Framework for NLP Models and Datasets in Latin and Ancient Greek. Schulz, K. & Deichsler, F. In Hämäläinen, M., Öhman, E., Miyagawa, S., Alnajjar, K., & Bizzoni, Y., editors, Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities, pages 247–258, Miami, USA, November, 2024. Association for Computational Linguistics. Paper abstract bibtex Literary scholars of Latin and Ancient Greek increasingly use natural language processing for their work, but many models and datasets are hard to use due to a lack of sustainable research data management. This paper introduces the Systematic Evaluation Framework for natural language processing models and datasets in Latin and Ancient Greek (SEFLAG), which consistently assesses language resources using common criteria, such as specific evaluation metrics, metadata and risk analysis. The framework, a work in progress in its initial phase, currently covers lemmatization and named entity recognition for both languages, with plans for adding dependency parsing and other tasks. For increased transparency and sustainability, a thorough documentation is included as well as an integration into the HuggingFace ecosystem. The combination of these efforts is designed to support researchers in their search for suitable models.
@inproceedings{schulz_seflag_2024,
address = {Miami, USA},
title = {{SEFLAG}: {Systematic} {Evaluation} {Framework} for {NLP} {Models} and {Datasets} in {Latin} and {Ancient} {Greek}},
shorttitle = {{SEFLAG}},
url = {https://aclanthology.org/2024.nlp4dh-1.24},
abstract = {Literary scholars of Latin and Ancient Greek increasingly use natural language processing for their work, but many models and datasets are hard to use due to a lack of sustainable research data management. This paper introduces the Systematic Evaluation Framework for natural language processing models and datasets in Latin and Ancient Greek (SEFLAG), which consistently assesses language resources using common criteria, such as specific evaluation metrics, metadata and risk analysis. The framework, a work in progress in its initial phase, currently covers lemmatization and named entity recognition for both languages, with plans for adding dependency parsing and other tasks. For increased transparency and sustainability, a thorough documentation is included as well as an integration into the HuggingFace ecosystem. The combination of these efforts is designed to support researchers in their search for suitable models.},
urldate = {2024-11-12},
booktitle = {Proceedings of the 4th {International} {Conference} on {Natural} {Language} {Processing} for {Digital} {Humanities}},
publisher = {Association for Computational Linguistics},
author = {Schulz, Konstantin and Deichsler, Florian},
editor = {Hämäläinen, Mika and Öhman, Emily and Miyagawa, So and Alnajjar, Khalid and Bizzoni, Yuri},
month = nov,
year = {2024},
pages = {247--258},
}
Downloads: 0
{"_id":"5wp5gbqn2f8rbFfeq","bibbaseid":"schulz-deichsler-seflagsystematicevaluationframeworkfornlpmodelsanddatasetsinlatinandancientgreek-2024","author_short":["Schulz, K.","Deichsler, F."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Miami, USA","title":"SEFLAG: Systematic Evaluation Framework for NLP Models and Datasets in Latin and Ancient Greek","shorttitle":"SEFLAG","url":"https://aclanthology.org/2024.nlp4dh-1.24","abstract":"Literary scholars of Latin and Ancient Greek increasingly use natural language processing for their work, but many models and datasets are hard to use due to a lack of sustainable research data management. This paper introduces the Systematic Evaluation Framework for natural language processing models and datasets in Latin and Ancient Greek (SEFLAG), which consistently assesses language resources using common criteria, such as specific evaluation metrics, metadata and risk analysis. The framework, a work in progress in its initial phase, currently covers lemmatization and named entity recognition for both languages, with plans for adding dependency parsing and other tasks. For increased transparency and sustainability, a thorough documentation is included as well as an integration into the HuggingFace ecosystem. The combination of these efforts is designed to support researchers in their search for suitable models.","urldate":"2024-11-12","booktitle":"Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities","publisher":"Association for Computational Linguistics","author":[{"propositions":[],"lastnames":["Schulz"],"firstnames":["Konstantin"],"suffixes":[]},{"propositions":[],"lastnames":["Deichsler"],"firstnames":["Florian"],"suffixes":[]}],"editor":[{"propositions":[],"lastnames":["Hämäläinen"],"firstnames":["Mika"],"suffixes":[]},{"propositions":[],"lastnames":["Öhman"],"firstnames":["Emily"],"suffixes":[]},{"propositions":[],"lastnames":["Miyagawa"],"firstnames":["So"],"suffixes":[]},{"propositions":[],"lastnames":["Alnajjar"],"firstnames":["Khalid"],"suffixes":[]},{"propositions":[],"lastnames":["Bizzoni"],"firstnames":["Yuri"],"suffixes":[]}],"month":"November","year":"2024","pages":"247–258","bibtex":"@inproceedings{schulz_seflag_2024,\n\taddress = {Miami, USA},\n\ttitle = {{SEFLAG}: {Systematic} {Evaluation} {Framework} for {NLP} {Models} and {Datasets} in {Latin} and {Ancient} {Greek}},\n\tshorttitle = {{SEFLAG}},\n\turl = {https://aclanthology.org/2024.nlp4dh-1.24},\n\tabstract = {Literary scholars of Latin and Ancient Greek increasingly use natural language processing for their work, but many models and datasets are hard to use due to a lack of sustainable research data management. This paper introduces the Systematic Evaluation Framework for natural language processing models and datasets in Latin and Ancient Greek (SEFLAG), which consistently assesses language resources using common criteria, such as specific evaluation metrics, metadata and risk analysis. The framework, a work in progress in its initial phase, currently covers lemmatization and named entity recognition for both languages, with plans for adding dependency parsing and other tasks. For increased transparency and sustainability, a thorough documentation is included as well as an integration into the HuggingFace ecosystem. The combination of these efforts is designed to support researchers in their search for suitable models.},\n\turldate = {2024-11-12},\n\tbooktitle = {Proceedings of the 4th {International} {Conference} on {Natural} {Language} {Processing} for {Digital} {Humanities}},\n\tpublisher = {Association for Computational Linguistics},\n\tauthor = {Schulz, Konstantin and Deichsler, Florian},\n\teditor = {Hämäläinen, Mika and Öhman, Emily and Miyagawa, So and Alnajjar, Khalid and Bizzoni, Yuri},\n\tmonth = nov,\n\tyear = {2024},\n\tpages = {247--258},\n}\n\n","author_short":["Schulz, K.","Deichsler, F."],"editor_short":["Hämäläinen, M.","Öhman, E.","Miyagawa, S.","Alnajjar, K.","Bizzoni, Y."],"key":"schulz_seflag_2024","id":"schulz_seflag_2024","bibbaseid":"schulz-deichsler-seflagsystematicevaluationframeworkfornlpmodelsanddatasetsinlatinandancientgreek-2024","role":"author","urls":{"Paper":"https://aclanthology.org/2024.nlp4dh-1.24"},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://api.zotero.org/users/912485/collections/VL2FMYT9/items?key=tJHJRX0dfdnwe3iuxbtyH4ht&format=bibtex&limit=100","dataSources":["nsAxma8Gv9c6fai5n","JFDnASMkoQCjjGL8E"],"keywords":[],"search_terms":["seflag","systematic","evaluation","framework","nlp","models","datasets","latin","ancient","greek","schulz","deichsler"],"title":"SEFLAG: Systematic Evaluation Framework for NLP Models and Datasets in Latin and Ancient Greek","year":2024}