var bibbase_data = {"data":"\"Loading..\"\n\n
\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n
\n generated by\n \n \"bibbase.org\"\n\n \n
\n \n\n
\n\n \n\n\n
\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n
\n JavaScript\n (easiest)\n
\n \n <script src=\"https://bibbase.org/show?bib=https%3A%2F%2Fapi.zotero.org%2Fgroups%2F2386895%2Fcollections%2F57E2Z43C%2Fitems%3Fformat%3Dbibtex%26limit%3D100&jsonp=1&group0=author_short&jsonp=1\"></script>\n \n
\n\n PHP\n
\n \n <?php\n $contents = file_get_contents(\"https://bibbase.org/show?bib=https%3A%2F%2Fapi.zotero.org%2Fgroups%2F2386895%2Fcollections%2F57E2Z43C%2Fitems%3Fformat%3Dbibtex%26limit%3D100&jsonp=1&group0=author_short\");\n print_r($contents);\n ?>\n \n
\n\n iFrame\n (not recommended)\n
\n \n <iframe src=\"https://bibbase.org/show?bib=https%3A%2F%2Fapi.zotero.org%2Fgroups%2F2386895%2Fcollections%2F57E2Z43C%2Fitems%3Fformat%3Dbibtex%26limit%3D100&jsonp=1&group0=author_short\"></iframe>\n \n
\n\n

\n For more details see the documention.\n

\n
\n
\n\n
\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n
\n\n
\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

    \n
  1. renew the authorization for BibBase on Mendeley, and
  2. \n
  3. update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n
  4. \n
\n

\n\n

\n \n \n Fix it now\n

\n
\n\n
\n\n\n
\n \n \n
\n
\n  \n Allison, S.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Authorship After AI.\n \n \n \n \n\n\n \n Allison, S.\n\n\n \n\n\n\n Public Books. June 2019.\n \n\n\n\n
\n\n\n\n \n \n \"AuthorshipPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{allison_authorship_2019,\n\ttitle = {Authorship {After} {AI}},\n\turl = {https://www.publicbooks.org/authorship-after-ai/},\n\tabstract = {Authorship attribution is helpful if you suspect fraud: for instance, if you believe that Shakespeare wasn’t educated enough to write the plays, or that Charlotte Brontë’s Jane Eyre was really ...},\n\tlanguage = {en-US},\n\turldate = {2019-06-27},\n\tjournal = {Public Books},\n\tauthor = {Allison, Sarah},\n\tmonth = jun,\n\tyear = {2019},\n\tkeywords = {act\\_StylisticAnalysis, goal\\_Analysis, obj\\_Literature, obj\\_Persons},\n}\n\n
\n
\n\n\n
\n Authorship attribution is helpful if you suspect fraud: for instance, if you believe that Shakespeare wasn’t educated enough to write the plays, or that Charlotte Brontë’s Jane Eyre was really ...\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Apostolos, A.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A survey of OCR evaluation tools and metrics.\n \n \n \n \n\n\n \n Neudecker, C.; Baierer, K.; Gerber, M.; Christian, C.; Apostolos, A.; and Stefan, P.\n\n\n \n\n\n\n In The 6th International Workshop on Historical Document Imaging and Processing, pages 13–18. Association for Computing Machinery, New York, NY, USA, September 2021.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{neudecker_survey_2021,\n\taddress = {New York, NY, USA},\n\ttitle = {A survey of {OCR} evaluation tools and metrics},\n\tisbn = {978-1-4503-8690-6},\n\turl = {https://doi.org/10.1145/3476887.3476888},\n\tabstract = {The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.},\n\turldate = {2022-02-03},\n\tbooktitle = {The 6th {International} {Workshop} on {Historical} {Document} {Imaging} and {Processing}},\n\tpublisher = {Association for Computing Machinery},\n\tauthor = {Neudecker, Clemens and Baierer, Konstantin and Gerber, Mike and Christian, Clausner and Apostolos, Antonacopoulos and Stefan, Pletschacher},\n\tmonth = sep,\n\tyear = {2021},\n\tkeywords = {accuracy, evaluation, metrics, optical character recognition},\n\tpages = {13--18},\n}\n\n
\n
\n\n\n
\n The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Baierer, K.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A survey of OCR evaluation tools and metrics.\n \n \n \n \n\n\n \n Neudecker, C.; Baierer, K.; Gerber, M.; Christian, C.; Apostolos, A.; and Stefan, P.\n\n\n \n\n\n\n In The 6th International Workshop on Historical Document Imaging and Processing, pages 13–18. Association for Computing Machinery, New York, NY, USA, September 2021.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{neudecker_survey_2021,\n\taddress = {New York, NY, USA},\n\ttitle = {A survey of {OCR} evaluation tools and metrics},\n\tisbn = {978-1-4503-8690-6},\n\turl = {https://doi.org/10.1145/3476887.3476888},\n\tabstract = {The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.},\n\turldate = {2022-02-03},\n\tbooktitle = {The 6th {International} {Workshop} on {Historical} {Document} {Imaging} and {Processing}},\n\tpublisher = {Association for Computing Machinery},\n\tauthor = {Neudecker, Clemens and Baierer, Konstantin and Gerber, Mike and Christian, Clausner and Apostolos, Antonacopoulos and Stefan, Pletschacher},\n\tmonth = sep,\n\tyear = {2021},\n\tkeywords = {accuracy, evaluation, metrics, optical character recognition},\n\tpages = {13--18},\n}\n\n
\n
\n\n\n
\n The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Bode, K.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n The Equivalence of “Close” and “Distant” Reading; or, Toward a New Object for Data-Rich Literary History.\n \n \n \n\n\n \n Bode, K.\n\n\n \n\n\n\n Modern language quarterly (Seattle), 78(1): 77–106. 2017.\n Place: Seattle Publisher: Duke University Press\n\n\n\n
\n\n\n\n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{bode_equivalence_2017,\n\ttitle = {The {Equivalence} of “{Close}” and “{Distant}” {Reading}; or, {Toward} a {New} {Object} for {Data}-{Rich} {Literary} {History}},\n\tvolume = {78},\n\tissn = {0026-7929},\n\tdoi = {10.1215/00267929-3699787},\n\tabstract = {The approaches to data-rich literary history that dominate academic and public debate—Franco Moretti’s “distant reading” and Matthew Jockers’s “macroanalysis”—model literary systems in limited, abstract, and often ahistorical ways. This problem arises from neglect of the activities and insights of textual scholarship and is inherited from, rather than opposed to, the New Criticism and its core method of “close reading.” Literary history requires not new or integrated methods but a new scholarly object capable of managing the documentary record’s complexity, especially as manifested in emerging digital knowledge infrastructure. Building on significant, though uneven and unacknowledged, departures from Moretti’s and Jockers’s work in data-rich literary history, this essay describes such an object, modeled on the foundational technology of textual scholarship: the scholarly edition.},\n\tlanguage = {eng},\n\tnumber = {1},\n\tjournal = {Modern language quarterly (Seattle)},\n\tauthor = {Bode, Katherine},\n\tyear = {2017},\n\tnote = {Place: Seattle\nPublisher: Duke University Press},\n\tkeywords = {Data mining, Data warehousing/data mining, Digital Technology, English Literature, General Literary Studies, Jocker, Matthew, Jockers, Matthew, Jockers, Matthew L, Language, Literature, and the Computer, Literary History, Literary Theory, Literary criticism, Literary histories, Literature, Methods, Moretti, Franco, Moretti, Franco (1950- ), New Criticism, Reading, Twenty-First Century, close reading, literary theory and criticism},\n\tpages = {77--106},\n}\n\n
\n
\n\n\n
\n The approaches to data-rich literary history that dominate academic and public debate—Franco Moretti’s “distant reading” and Matthew Jockers’s “macroanalysis”—model literary systems in limited, abstract, and often ahistorical ways. This problem arises from neglect of the activities and insights of textual scholarship and is inherited from, rather than opposed to, the New Criticism and its core method of “close reading.” Literary history requires not new or integrated methods but a new scholarly object capable of managing the documentary record’s complexity, especially as manifested in emerging digital knowledge infrastructure. Building on significant, though uneven and unacknowledged, departures from Moretti’s and Jockers’s work in data-rich literary history, this essay describes such an object, modeled on the foundational technology of textual scholarship: the scholarly edition.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Bubenhofer, N.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Semantische Äquivalenz in Geburtserzählungen: Anwendung von Word Embeddings.\n \n \n \n \n\n\n \n Bubenhofer, N.\n\n\n \n\n\n\n Zeitschrift für germanistische Linguistik, 48(3): 562–589. December 2020.\n Publisher: De Gruyter\n\n\n\n
\n\n\n\n \n \n \"SemantischePaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{bubenhofer_semantische_2020,\n\ttitle = {Semantische Äquivalenz in {Geburtserzählungen}: {Anwendung} von {Word} {Embeddings}},\n\tvolume = {48},\n\tissn = {1613-0626},\n\tshorttitle = {Semantische Äquivalenz in {Geburtserzählungen}},\n\turl = {https://www.degruyter.com/document/doi/10.1515/zgl-2020-2014/html},\n\tdoi = {10.1515/zgl-2020-2014},\n\tabstract = {The present study focuses on serially occurring narrations of ‘everyday’ life, more specifically on birthing as narrated by mothers on online forums; the underlying idea being that these narrations happen against the background of cultural narratives. The present paper uses word embedding models to detect typical topics and actors in these narrations. The calculation of word embeddings automatically constructs semantic spaces, where semantic relations (synonymy in particular) can be modeled. This method offers a way to think of synonymy as ‘functional equivalence in discourse’. The present study relies on previous work with n-grams (Bubenhofer, 2018). N-grams are sequences of words that often appear together; their sequential order in different narrations gives insight in narrative patterns. A further step in the analysis is the construction of ‘narrative topoi’, which is achieved through clustering neighboring vectors. The emerging clusters can in turn be grouped into five narrative elements of ‘telling a birth story’: 1) disruption of daily life, 2) personnel, 3) body, 4) fear, 5) joy. While it seems obvious that certain themes ‘belong’ into the narration of a delivery, it is less obvious with what vocabulary these themes are expressed. The presented method of clustering word-embedding-profiles adds tremendously to the modelling of a narrative. Its advantages lie in its potential to show lexical variation, as it also includes rare, non-conformative orthographical variants. Furthermore, it allows for a discourse-specific (and usage-based) view on semantic relations. The same applies to relations between semantic clusters. Seen from a discourse linguistics or cultural analysis perspective, word embeddings renew our understanding of semantics. This shows particularly fruitful if used to analyze (discourse dependent) derivations between semantic spaces.},\n\tlanguage = {en},\n\tnumber = {3},\n\turldate = {2022-02-03},\n\tjournal = {Zeitschrift für germanistische Linguistik},\n\tauthor = {Bubenhofer, Noah},\n\tmonth = dec,\n\tyear = {2020},\n\tnote = {Publisher: De Gruyter},\n\tpages = {562--589},\n}\n\n
\n
\n\n\n
\n The present study focuses on serially occurring narrations of ‘everyday’ life, more specifically on birthing as narrated by mothers on online forums; the underlying idea being that these narrations happen against the background of cultural narratives. The present paper uses word embedding models to detect typical topics and actors in these narrations. The calculation of word embeddings automatically constructs semantic spaces, where semantic relations (synonymy in particular) can be modeled. This method offers a way to think of synonymy as ‘functional equivalence in discourse’. The present study relies on previous work with n-grams (Bubenhofer, 2018). N-grams are sequences of words that often appear together; their sequential order in different narrations gives insight in narrative patterns. A further step in the analysis is the construction of ‘narrative topoi’, which is achieved through clustering neighboring vectors. The emerging clusters can in turn be grouped into five narrative elements of ‘telling a birth story’: 1) disruption of daily life, 2) personnel, 3) body, 4) fear, 5) joy. While it seems obvious that certain themes ‘belong’ into the narration of a delivery, it is less obvious with what vocabulary these themes are expressed. The presented method of clustering word-embedding-profiles adds tremendously to the modelling of a narrative. Its advantages lie in its potential to show lexical variation, as it also includes rare, non-conformative orthographical variants. Furthermore, it allows for a discourse-specific (and usage-based) view on semantic relations. The same applies to relations between semantic clusters. Seen from a discourse linguistics or cultural analysis perspective, word embeddings renew our understanding of semantics. This shows particularly fruitful if used to analyze (discourse dependent) derivations between semantic spaces.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Burrows, J.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n ‘Delta’: a Measure of Stylistic Difference and a Guide to Likely Authorship.\n \n \n \n \n\n\n \n Burrows, J.\n\n\n \n\n\n\n Literary and Linguistic Computing, 17(3): 267 –287. 2002.\n \n\n\n\n
\n\n\n\n \n \n \"‘Delta’:Paper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{burrows_delta_2002,\n\ttitle = {‘{Delta}’: a {Measure} of {Stylistic} {Difference} and a {Guide} to {Likely} {Authorship}},\n\tvolume = {17},\n\tshorttitle = {‘{Delta}’},\n\turl = {http://llc.oxfordjournals.org/content/17/3/267.abstract},\n\tdoi = {10.1093/llc/17.3.267},\n\tabstract = {This paper is a companion to my ‘Questions of authorship: attribution and beyond’, in which I sketched a new way of using the relative frequencies of the very common words for comparing written texts and testing their likely authorship. The main emphasis of that paper was not on the new procedure but on the broader consequences of our increasing sophistication in making such comparisons and the increasing (although never absolute) reliability of our inferences about authorship. My present objects, accordingly, are to give a more complete account of the procedure itself; to report the outcome of an extensive set of trials; and to consider the strengths and limitations of the new procedure. The procedure offers a simple but comparatively accurate addition to our current methods of distinguishing the most likely author of texts exceeding about 1,500 words in length. It is of even greater value as a method of reducing the field of likely candidates for texts of as little as 100 words in length. Not unexpectedly, it works least well with texts of a genre uncharacteristic of their author and, in one case, with texts far separated in time across a long literary career. Its possible use for other classificatory tasks has not yet been investigated.},\n\tlanguage = {en},\n\tnumber = {3},\n\turldate = {2011-07-26},\n\tjournal = {Literary and Linguistic Computing},\n\tauthor = {Burrows, John},\n\tyear = {2002},\n\tkeywords = {*****, X-CHECK, obj\\_Methods, t\\_Stylometry},\n\tpages = {267 --287},\n}\n\n
\n
\n\n\n
\n This paper is a companion to my ‘Questions of authorship: attribution and beyond’, in which I sketched a new way of using the relative frequencies of the very common words for comparing written texts and testing their likely authorship. The main emphasis of that paper was not on the new procedure but on the broader consequences of our increasing sophistication in making such comparisons and the increasing (although never absolute) reliability of our inferences about authorship. My present objects, accordingly, are to give a more complete account of the procedure itself; to report the outcome of an extensive set of trials; and to consider the strengths and limitations of the new procedure. The procedure offers a simple but comparatively accurate addition to our current methods of distinguishing the most likely author of texts exceeding about 1,500 words in length. It is of even greater value as a method of reducing the field of likely candidates for texts of as little as 100 words in length. Not unexpectedly, it works least well with texts of a genre uncharacteristic of their author and, in one case, with texts far separated in time across a long literary career. Its possible use for other classificatory tasks has not yet been investigated.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Chao, A.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Christian, C.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A survey of OCR evaluation tools and metrics.\n \n \n \n \n\n\n \n Neudecker, C.; Baierer, K.; Gerber, M.; Christian, C.; Apostolos, A.; and Stefan, P.\n\n\n \n\n\n\n In The 6th International Workshop on Historical Document Imaging and Processing, pages 13–18. Association for Computing Machinery, New York, NY, USA, September 2021.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{neudecker_survey_2021,\n\taddress = {New York, NY, USA},\n\ttitle = {A survey of {OCR} evaluation tools and metrics},\n\tisbn = {978-1-4503-8690-6},\n\turl = {https://doi.org/10.1145/3476887.3476888},\n\tabstract = {The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.},\n\turldate = {2022-02-03},\n\tbooktitle = {The 6th {International} {Workshop} on {Historical} {Document} {Imaging} and {Processing}},\n\tpublisher = {Association for Computing Machinery},\n\tauthor = {Neudecker, Clemens and Baierer, Konstantin and Gerber, Mike and Christian, Clausner and Apostolos, Antonacopoulos and Stefan, Pletschacher},\n\tmonth = sep,\n\tyear = {2021},\n\tkeywords = {accuracy, evaluation, metrics, optical character recognition},\n\tpages = {13--18},\n}\n\n
\n
\n\n\n
\n The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Driscoll, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Ernst, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Categorising Legal Records – Deductive, Pragmatic, and Computational Strategies.\n \n \n \n\n\n \n Ernst, M.; Gassner, S.; Gerstmeier, M.; and Rehbein, M.\n\n\n \n\n\n\n Digital Humanities Quarterly, 017(3). July 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ernst_categorising_2023,\n\ttitle = {Categorising {Legal} {Records} – {Deductive}, {Pragmatic}, and {Computational} {Strategies}},\n\tvolume = {017},\n\tissn = {1938-4122},\n\tnumber = {3},\n\tjournal = {Digital Humanities Quarterly},\n\tauthor = {Ernst, Marlene and Gassner, Sebastian and Gerstmeier, Markus and Rehbein, Malte},\n\tmonth = jul,\n\tyear = {2023},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Filho, D.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Introduction to historical (social) network analysis – Part I.\n \n \n \n \n\n\n \n Filho, D. V.\n\n\n \n\n\n\n \n \n\n\n\n
\n\n\n\n \n \n \"IntroductionPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@misc{filho_introduction_nodate,\n\ttype = {Billet},\n\ttitle = {Introduction to historical (social) network analysis – {Part} {I}},\n\turl = {https://dhlab.hypotheses.org/2363},\n\tabstract = {by Demival Vasques Filho Last September, as part of our seminar series, “60 minutes of DH”, at the IEG, I presented an introduction to historical (social) network analysis. In the talk, I gave an overview of the field’s history, discussing landmark papers, in my opinion. It was a mix of going through papers fundamental to … „Introduction to historical (social) network analysis – Part I“ weiterlesen},\n\tlanguage = {de-DE},\n\turldate = {2022-01-28},\n\tjournal = {Digital Humanities Lab},\n\tauthor = {Filho, Demival Vasques},\n}\n\n
\n
\n\n\n
\n by Demival Vasques Filho Last September, as part of our seminar series, “60 minutes of DH”, at the IEG, I presented an introduction to historical (social) network analysis. In the talk, I gave an overview of the field’s history, discussing landmark papers, in my opinion. It was a mix of going through papers fundamental to … „Introduction to historical (social) network analysis – Part I“ weiterlesen\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Fischer, E.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Culturomics. Digitale Bibliotheken als Basis für quantitative Kulturanalysen.\n \n \n \n\n\n \n Fischer, E.\n\n\n \n\n\n\n Kodex, 1: 55–78. 2011.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{fischer_culturomics._2011,\n\ttitle = {Culturomics. {Digitale} {Bibliotheken} als {Basis} für quantitative {Kulturanalysen}},\n\tvolume = {1},\n\tjournal = {Kodex},\n\tauthor = {Fischer, Ernst},\n\tyear = {2011},\n\tpages = {55--78},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Gassner, S.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Categorising Legal Records – Deductive, Pragmatic, and Computational Strategies.\n \n \n \n\n\n \n Ernst, M.; Gassner, S.; Gerstmeier, M.; and Rehbein, M.\n\n\n \n\n\n\n Digital Humanities Quarterly, 017(3). July 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ernst_categorising_2023,\n\ttitle = {Categorising {Legal} {Records} – {Deductive}, {Pragmatic}, and {Computational} {Strategies}},\n\tvolume = {017},\n\tissn = {1938-4122},\n\tnumber = {3},\n\tjournal = {Digital Humanities Quarterly},\n\tauthor = {Ernst, Marlene and Gassner, Sebastian and Gerstmeier, Markus and Rehbein, Malte},\n\tmonth = jul,\n\tyear = {2023},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Gerber, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A survey of OCR evaluation tools and metrics.\n \n \n \n \n\n\n \n Neudecker, C.; Baierer, K.; Gerber, M.; Christian, C.; Apostolos, A.; and Stefan, P.\n\n\n \n\n\n\n In The 6th International Workshop on Historical Document Imaging and Processing, pages 13–18. Association for Computing Machinery, New York, NY, USA, September 2021.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{neudecker_survey_2021,\n\taddress = {New York, NY, USA},\n\ttitle = {A survey of {OCR} evaluation tools and metrics},\n\tisbn = {978-1-4503-8690-6},\n\turl = {https://doi.org/10.1145/3476887.3476888},\n\tabstract = {The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.},\n\turldate = {2022-02-03},\n\tbooktitle = {The 6th {International} {Workshop} on {Historical} {Document} {Imaging} and {Processing}},\n\tpublisher = {Association for Computing Machinery},\n\tauthor = {Neudecker, Clemens and Baierer, Konstantin and Gerber, Mike and Christian, Clausner and Apostolos, Antonacopoulos and Stefan, Pletschacher},\n\tmonth = sep,\n\tyear = {2021},\n\tkeywords = {accuracy, evaluation, metrics, optical character recognition},\n\tpages = {13--18},\n}\n\n
\n
\n\n\n
\n The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Gerstmeier, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Categorising Legal Records – Deductive, Pragmatic, and Computational Strategies.\n \n \n \n\n\n \n Ernst, M.; Gassner, S.; Gerstmeier, M.; and Rehbein, M.\n\n\n \n\n\n\n Digital Humanities Quarterly, 017(3). July 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ernst_categorising_2023,\n\ttitle = {Categorising {Legal} {Records} – {Deductive}, {Pragmatic}, and {Computational} {Strategies}},\n\tvolume = {017},\n\tissn = {1938-4122},\n\tnumber = {3},\n\tjournal = {Digital Humanities Quarterly},\n\tauthor = {Ernst, Marlene and Gassner, Sebastian and Gerstmeier, Markus and Rehbein, Malte},\n\tmonth = jul,\n\tyear = {2023},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Gleim, R.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Der eHumanities Desktop als Werkzeug in der historischen Semantik: Funktionsspektren und Einsatzszenarien.\n \n \n \n \n\n\n \n Mehler, A.; Schwandt, S.; Gleim, R.; and Jussen, B.\n\n\n \n\n\n\n Journal for Language Technology and Computational Linguistics (JCLC), 26(1). 2011.\n \n\n\n\n
\n\n\n\n \n \n \"DerPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{mehler_ehumanities_2011,\n\ttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}: {Funktionsspektren} und {Einsatzszenarien}},\n\tvolume = {26},\n\tissn = {2190-6858},\n\tshorttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}},\n\turl = {https://pub.uni-bielefeld.de/record/2902769},\n\tlanguage = {ger},\n\tnumber = {1},\n\turldate = {2021-08-26},\n\tjournal = {Journal for Language Technology and Computational Linguistics (JCLC)},\n\tauthor = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Jussen, Bernhard},\n\tyear = {2011},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Grandjean, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A Conceptual Framework for the Analysis of Multilayer Networks in the Humanities.\n \n \n \n \n\n\n \n Grandjean, M.\n\n\n \n\n\n\n In Ottawa, 2020. \n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{grandjean_conceptual_2020,\n\taddress = {Ottawa},\n\ttitle = {A {Conceptual} {Framework} for the {Analysis} of {Multilayer} {Networks} in the {Humanities}},\n\turl = {https://hcommons.org/deposits/item/hc:31941/},\n\tabstract = {If network analysis has made its way into the humanities toolbox, and especially in history, it is because it helps to grasp the complexity of the objects of these disciplines. However, to understand the multidimensionality of the data requires a consequent reflection on its modeling. This paper seeks to be part of a series of publications aimed at making advanced network analysis concepts more accessible to the humanities scholars: from ontological questions to the necessary discussion of the integration of temporality in graphs, the development of typologies of uses or attempts to provide aids to interpretation. The question of multilayer networks becomes especially more and more important, whether in a general way or applied to the humanities.},\n\tlanguage = {en-US},\n\turldate = {2021-10-11},\n\tauthor = {Grandjean, Martin},\n\tyear = {2020},\n}\n\n
\n
\n\n\n
\n If network analysis has made its way into the humanities toolbox, and especially in history, it is because it helps to grasp the complexity of the objects of these disciplines. However, to understand the multidimensionality of the data requires a consequent reflection on its modeling. This paper seeks to be part of a series of publications aimed at making advanced network analysis concepts more accessible to the humanities scholars: from ontological questions to the necessary discussion of the integration of temporality in graphs, the development of typologies of uses or attempts to provide aids to interpretation. The question of multilayer networks becomes especially more and more important, whether in a general way or applied to the humanities.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Harris, J.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Word clouds considered harmful.\n \n \n \n \n\n\n \n Harris, J.\n\n\n \n\n\n\n October 2011.\n \n\n\n\n
\n\n\n\n \n \n \"WordPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@misc{harris_word_2011,\n\ttype = {Blog},\n\ttitle = {Word clouds considered harmful},\n\tshorttitle = {Clouds},\n\turl = {http://www.niemanlab.org/2011/10/word-clouds-considered-harmful/},\n\tabstract = {The New York Times senior software architect would like the newest "mullets of the Internet" to go back from whence they came.},\n\turldate = {2014-07-29},\n\tjournal = {Nieman Journalism Lab},\n\tauthor = {Harris, Jacob},\n\tmonth = oct,\n\tyear = {2011},\n}\n
\n
\n\n\n
\n The New York Times senior software architect would like the newest \"mullets of the Internet\" to go back from whence they came.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Jockers, M.\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Macroanalysis: digital methods and literary history.\n \n \n \n\n\n \n Jockers, M. L.\n\n\n \n\n\n\n of Topics in the digital humanitiesUniversity of Illinois Press, Urbana, 2013.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@book{jockers_macroanalysis_2013,\n\taddress = {Urbana},\n\tseries = {Topics in the digital humanities},\n\ttitle = {Macroanalysis: digital methods and literary history},\n\tisbn = {978-0-252-03752-8},\n\tshorttitle = {Macroanalysis},\n\tabstract = {"The creation of enormous and inclusive databases, the digitization of literary works, and the steady improvement in search engines enable scholars in the digital humanities to ask very big questions. Using computational analysis to retrieve key words, phrases, and linguistic patterns across thousands of texts in digital libraries, researchers can draw conclusions based on quantifiable evidence regarding how literary trends are employed over time, across periods, within regions, or within demographic groups, as well as how cultural, historical, and societal linkages may bind individual authors, texts, and genres into an aggregate literary culture. In this volume, Matthew L. Jockers introduces readers to large-scale literary computing and the revolutionary potential of macroanalysis--a new approach to the study of the literary record designed for probing the digital-textual world as it exists today, in digital form and in large quantities. Moving beyond the limitations of literary interpretation based on the "close-reading" of individual works, Jockers describes how this new method of studying large collections of digital material can help us to better understand and contextualize the individual works within those collections."--Publisher's website},\n\tlanguage = {eng},\n\tpublisher = {University of Illinois Press},\n\tauthor = {Jockers, Matthew Lee},\n\tyear = {2013},\n\tkeywords = {Analyse des données, Big Data, COMPUTERANWENDUNGEN/SOZIAL- UND GEISTESWISSENSCHAFTEN, DATA MINING (MATHEMATISCHE STATISTIK), Data processing, Datenanalyse, Datenverarbeitung, Digital Humanities, Digital humanities, FORSCHUNGSNETZWERKE + INFORMATIONSNETZWERKE (BIBLIOTHEKSWESEN), History and criticism, Humanités numériques, INFORMATIONS- UND LITERATURRECHERCHEN (WISSENSCHAFTLICHE ARBEITSTECHNIK), LITERATURGESCHICHTE, Literature, Literaturgeschichte Fach, Littérature, Methodology, Méthodologie, Recherche, Research, Theory, etc},\n}\n\n
\n
\n\n\n
\n \"The creation of enormous and inclusive databases, the digitization of literary works, and the steady improvement in search engines enable scholars in the digital humanities to ask very big questions. Using computational analysis to retrieve key words, phrases, and linguistic patterns across thousands of texts in digital libraries, researchers can draw conclusions based on quantifiable evidence regarding how literary trends are employed over time, across periods, within regions, or within demographic groups, as well as how cultural, historical, and societal linkages may bind individual authors, texts, and genres into an aggregate literary culture. In this volume, Matthew L. Jockers introduces readers to large-scale literary computing and the revolutionary potential of macroanalysis–a new approach to the study of the literary record designed for probing the digital-textual world as it exists today, in digital form and in large quantities. Moving beyond the limitations of literary interpretation based on the \"close-reading\" of individual works, Jockers describes how this new method of studying large collections of digital material can help us to better understand and contextualize the individual works within those collections.\"–Publisher's website\n
\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n A comparative study of machine learning methods for authorship attribution.\n \n \n \n \n\n\n \n Jockers, M. L.; and Witten, D. M.\n\n\n \n\n\n\n Literary and Linguistic Computing, 25(2): 215–223. June 2010.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{jockers_comparative_2010,\n\ttitle = {A comparative study of machine learning methods for authorship attribution},\n\tvolume = {25},\n\turl = {http://llc.oxfordjournals.org/content/25/2/215.abstract},\n\tdoi = {10.1093/llc/fqq001},\n\tabstract = {We compare and benchmark the performance of five classification methods, four of which are taken from the machine learning literature, in a classic authorship attribution problem involving the Federalist Papers. Cross-validation results are reported for each method, and each method is further employed in classifying the disputed papers and the few papers that are generally understood to be coauthored. These tests are performed using two separate feature sets: a “raw” feature set containing all words and word bigrams that are common to all of the authors, and a second “pre-processed” feature set derived by reducing the raw feature set to include only words meeting a minimum relative frequency threshold. Each of the methods tested performed well, but nearest shrunken centroids and regularized discriminant analysis had the best overall performances with 0/70 cross-validation errors.},\n\tlanguage = {en},\n\tnumber = {2},\n\turldate = {2011-12-14},\n\tjournal = {Literary and Linguistic Computing},\n\tauthor = {Jockers, Matthew L. and Witten, Daniela M.},\n\tmonth = jun,\n\tyear = {2010},\n\tkeywords = {AnalyzeStatistically, bigdata, meta\\_Theorizing, t\\_MachineLearning, t\\_Stylometry},\n\tpages = {215--223},\n}\n\n
\n
\n\n\n
\n We compare and benchmark the performance of five classification methods, four of which are taken from the machine learning literature, in a classic authorship attribution problem involving the Federalist Papers. Cross-validation results are reported for each method, and each method is further employed in classifying the disputed papers and the few papers that are generally understood to be coauthored. These tests are performed using two separate feature sets: a “raw” feature set containing all words and word bigrams that are common to all of the authors, and a second “pre-processed” feature set derived by reducing the raw feature set to include only words meeting a minimum relative frequency threshold. Each of the methods tested performed well, but nearest shrunken centroids and regularized discriminant analysis had the best overall performances with 0/70 cross-validation errors.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Jussen, B.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Der eHumanities Desktop als Werkzeug in der historischen Semantik: Funktionsspektren und Einsatzszenarien.\n \n \n \n \n\n\n \n Mehler, A.; Schwandt, S.; Gleim, R.; and Jussen, B.\n\n\n \n\n\n\n Journal for Language Technology and Computational Linguistics (JCLC), 26(1). 2011.\n \n\n\n\n
\n\n\n\n \n \n \"DerPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{mehler_ehumanities_2011,\n\ttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}: {Funktionsspektren} und {Einsatzszenarien}},\n\tvolume = {26},\n\tissn = {2190-6858},\n\tshorttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}},\n\turl = {https://pub.uni-bielefeld.de/record/2902769},\n\tlanguage = {ger},\n\tnumber = {1},\n\turldate = {2021-08-26},\n\tjournal = {Journal for Language Technology and Computational Linguistics (JCLC)},\n\tauthor = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Jussen, Bernhard},\n\tyear = {2011},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Kapitan, K.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Karsdorp, F.\n \n \n (2)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n What Shall We Do With the Unseen Sailor? Estimating the Size of the Dutch East India Company Using an Unseen Species Model.\n \n \n \n \n\n\n \n Wevers, M.; Karsdorp, F.; and van Lottum, J.\n\n\n \n\n\n\n In CEUR Workshop Proceedings, Antwerp, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"WhatPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{wevers_what_2022,\n\taddress = {Antwerp},\n\ttitle = {What {Shall} {We} {Do} {With} the {Unseen} {Sailor}? {Estimating} the {Size} of the {Dutch} {East} {India} {Company} {Using} an {Unseen} {Species} {Model}},\n\turl = {https://ceur-ws.org/Vol-3290/short_paper1793.pdf},\n\tbooktitle = {{CEUR} {Workshop} {Proceedings}},\n\tauthor = {Wevers, Melvin and Karsdorp, Folgert and van Lottum, Jelle},\n\tyear = {2022},\n}\n\n
\n
\n\n\n\n
\n\n\n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Kestemont, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Kirk, J.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Word Frequency Use or Misuse?.\n \n \n \n\n\n \n Kirk, J. M.\n\n\n \n\n\n\n In Archer, D., editor(s), What's in a Word-list. Inverstigating word frequency and keyword extraction, of Digital research in the arts and humanities, pages 17–33. Ashgate, Farnham, 2009.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@incollection{kirk_word_2009,\n\taddress = {Farnham},\n\tseries = {Digital research in the arts and humanities},\n\ttitle = {Word {Frequency} {Use} or {Misuse}?},\n\tisbn = {978-0-7546-7240-1},\n\tshorttitle = {Frequency},\n\tnumber = {3},\n\tbooktitle = {What's in a {Word}-list. {Inverstigating} word frequency and keyword extraction},\n\tpublisher = {Ashgate},\n\tauthor = {Kirk, John M.},\n\teditor = {Archer, Dawn},\n\tyear = {2009},\n\tpages = {17--33},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Manovich, L.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Cultural analytics.\n \n \n \n\n\n \n Manovich, L.\n\n\n \n\n\n\n The MIT Press, Cambridge, Massachusetts, 2020.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@book{manovich_cultural_2020,\n\taddress = {Cambridge, Massachusetts},\n\ttitle = {Cultural analytics},\n\tisbn = {978-0-262-03710-5},\n\tabstract = {How can we see a billion images? What analytical methods can we bring to bear on the astonishing scale of digital culture—the terabytes of photographs shared on social media every day, the hundreds of millions of songs created by twenty million musicians on Sound Cloud, the content of four billion Pinterest boards? In Cultural Analytics, Lev Manovich presents concepts and methods for computational analysis of cultural data, with a particular focus on visual media. Drawing on more than a decade of research and projects from his own lab, Manovich—the founder of the field of cultural analytics—offers a gentle, nontechnical introduction to selected key concepts of data science and discusses the ways that our society uses data and algorithms.Manovich offers examples of computational cultural analysis and discusses the shift from “new media” to “more media”; explains how to turn cultural processes into computational data; and introduces concepts for exploring cultural datasets using data visualization as well as other recently developed methods for analyzing image and video datasets. He considers both the possibilities and the limitations of computational methods, and how using them challenges our existing ideas about culture and how to study it.Cultural Analytics is a book of media theory. Arguing that before we can theorize digital culture, we need to see it, and that, because of its scale, to see it we need computers, Manovich provides scholars with practical tools for studying contemporary media.},\n\tlanguage = {eng},\n\tpublisher = {The MIT Press},\n\tauthor = {Manovich, Lev},\n\tyear = {2020},\n\tkeywords = {Algorithmes, Analyse des données, DIGITALISIERUNG (SOZIOLOGIE), Data visualisation, Datenanalyse, Datenverarbeitung, Digital Humanities, Données massives, EINUNDZWANZIGSTES JAHRHUNDERT N. CHR, GEISTESWISSENSCHAFTEN + KÜNSTE (WISSENSCHAFTSTHEORIE), Geschichte 2005-2020, Humanités numériques, INFORMATIK UND GESELLSCHAFT, KULTURGESCHICHTE, KUNSTTHEORIE + KUNSTÄSTHETIK + KUNSTPHILOSOPHIE, Kultur, Massenkommunikation, Medienforschung, Médias sociaux, Méthodologie, Recherche, Sciences sociales, Statistik, Visualisierung},\n}\n\n
\n
\n\n\n
\n How can we see a billion images? What analytical methods can we bring to bear on the astonishing scale of digital culture—the terabytes of photographs shared on social media every day, the hundreds of millions of songs created by twenty million musicians on Sound Cloud, the content of four billion Pinterest boards? In Cultural Analytics, Lev Manovich presents concepts and methods for computational analysis of cultural data, with a particular focus on visual media. Drawing on more than a decade of research and projects from his own lab, Manovich—the founder of the field of cultural analytics—offers a gentle, nontechnical introduction to selected key concepts of data science and discusses the ways that our society uses data and algorithms.Manovich offers examples of computational cultural analysis and discusses the shift from “new media” to “more media”; explains how to turn cultural processes into computational data; and introduces concepts for exploring cultural datasets using data visualization as well as other recently developed methods for analyzing image and video datasets. He considers both the possibilities and the limitations of computational methods, and how using them challenges our existing ideas about culture and how to study it.Cultural Analytics is a book of media theory. Arguing that before we can theorize digital culture, we need to see it, and that, because of its scale, to see it we need computers, Manovich provides scholars with practical tools for studying contemporary media.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n McCarty, W.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Modeling: A Study in Words and Meanings.\n \n \n \n \n\n\n \n McCarty, W.\n\n\n \n\n\n\n In Schreibman, S.; Siemens, R.; and Unsworth, J., editor(s), A Companion to Digital Humanities, pages (chapter 19). Blackwell, Oxford, Online Edition edition, 2004.\n \n\n\n\n
\n\n\n\n \n \n \"Modeling:Paper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{mccarty_modeling_2004,\n\taddress = {Oxford},\n\tedition = {Online Edition},\n\ttitle = {Modeling: {A} {Study} in {Words} and {Meanings}},\n\turl = {http://www.digitalhumanities.org/companion/},\n\tabstract = {The question of modeling arises naturally for humanities computing from the prior question of what its practitioners across the disciplines have in common. What are they all doing with their computers that we might find in their diverse activities indications of a coherent or cohesible practice? How do we make the best, most productive sense of what we observe? There are, of course, many answers: practice varies from person to person, from project to project, and ways of construing it perhaps vary even more. In this chapter I argue for modeling as a model of such a practice. I have three confluent goals: to identify humanities computing with an intellectual ground shared by the older disciplines, so that we may say how and to what extent our field is of as well as in the humanities, how it draws from and adds to them; at the same time to reflect experience with computers "in the wild"; and to aim at the most challenging problems, and so the most intellectually rewarding future now imaginable.},\n\tlanguage = {en},\n\tbooktitle = {A {Companion} to {Digital} {Humanities}},\n\tpublisher = {Blackwell},\n\tauthor = {McCarty, Willard},\n\teditor = {Schreibman, Susan and Siemens, Ray and Unsworth, John},\n\tyear = {2004},\n\tkeywords = {act\\_Modeling, goal\\_Analysis, goal\\_Interpretation, meta\\_Theorizing, obj\\_AnyObject},\n\tpages = {(chapter 19)},\n}\n\n
\n
\n\n\n
\n The question of modeling arises naturally for humanities computing from the prior question of what its practitioners across the disciplines have in common. What are they all doing with their computers that we might find in their diverse activities indications of a coherent or cohesible practice? How do we make the best, most productive sense of what we observe? There are, of course, many answers: practice varies from person to person, from project to project, and ways of construing it perhaps vary even more. In this chapter I argue for modeling as a model of such a practice. I have three confluent goals: to identify humanities computing with an intellectual ground shared by the older disciplines, so that we may say how and to what extent our field is of as well as in the humanities, how it draws from and adds to them; at the same time to reflect experience with computers \"in the wild\"; and to aim at the most challenging problems, and so the most intellectually rewarding future now imaginable.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Mehler, A.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Der eHumanities Desktop als Werkzeug in der historischen Semantik: Funktionsspektren und Einsatzszenarien.\n \n \n \n \n\n\n \n Mehler, A.; Schwandt, S.; Gleim, R.; and Jussen, B.\n\n\n \n\n\n\n Journal for Language Technology and Computational Linguistics (JCLC), 26(1). 2011.\n \n\n\n\n
\n\n\n\n \n \n \"DerPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{mehler_ehumanities_2011,\n\ttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}: {Funktionsspektren} und {Einsatzszenarien}},\n\tvolume = {26},\n\tissn = {2190-6858},\n\tshorttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}},\n\turl = {https://pub.uni-bielefeld.de/record/2902769},\n\tlanguage = {ger},\n\tnumber = {1},\n\turldate = {2021-08-26},\n\tjournal = {Journal for Language Technology and Computational Linguistics (JCLC)},\n\tauthor = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Jussen, Bernhard},\n\tyear = {2011},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Moretti, F.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Distant reading.\n \n \n \n\n\n \n Moretti, F.\n\n\n \n\n\n\n Verso, London, 2013.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@book{moretti_distant_2013,\n\taddress = {London},\n\ttitle = {Distant reading},\n\tisbn = {978-1-78168-084-1},\n\tpublisher = {Verso},\n\tauthor = {Moretti, Franco},\n\tyear = {2013},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Neudecker, C.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A survey of OCR evaluation tools and metrics.\n \n \n \n \n\n\n \n Neudecker, C.; Baierer, K.; Gerber, M.; Christian, C.; Apostolos, A.; and Stefan, P.\n\n\n \n\n\n\n In The 6th International Workshop on Historical Document Imaging and Processing, pages 13–18. Association for Computing Machinery, New York, NY, USA, September 2021.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{neudecker_survey_2021,\n\taddress = {New York, NY, USA},\n\ttitle = {A survey of {OCR} evaluation tools and metrics},\n\tisbn = {978-1-4503-8690-6},\n\turl = {https://doi.org/10.1145/3476887.3476888},\n\tabstract = {The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.},\n\turldate = {2022-02-03},\n\tbooktitle = {The 6th {International} {Workshop} on {Historical} {Document} {Imaging} and {Processing}},\n\tpublisher = {Association for Computing Machinery},\n\tauthor = {Neudecker, Clemens and Baierer, Konstantin and Gerber, Mike and Christian, Clausner and Apostolos, Antonacopoulos and Stefan, Pletschacher},\n\tmonth = sep,\n\tyear = {2021},\n\tkeywords = {accuracy, evaluation, metrics, optical character recognition},\n\tpages = {13--18},\n}\n\n
\n
\n\n\n
\n The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Petz, C.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A (not so) short history of historical network research – part 1.\n \n \n \n \n\n\n \n Petz, C.\n\n\n \n\n\n\n July 2022.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@misc{petz_not_2022,\n\ttype = {Billet},\n\ttitle = {A (not so) short history of historical network research – part 1},\n\turl = {https://dhlab.hypotheses.org/3126},\n\tabstract = {by Cindarella Petz Buckle up for a dive into the history of historical network research! In this (three-part series) post, I will recount the history of computational thought in historical research, discuss networks for history, and highlight some of the cornerstones of network methodologies in historical studies.1 Prepare for part 1 on computational thought in … „A (not so) short history of historical network research – part 1“ weiterlesen},\n\tlanguage = {de-DE},\n\turldate = {2023-03-13},\n\tjournal = {Digital Humanities Lab},\n\tauthor = {Petz, Cindarella},\n\tmonth = jul,\n\tyear = {2022},\n}\n\n
\n
\n\n\n
\n by Cindarella Petz Buckle up for a dive into the history of historical network research! In this (three-part series) post, I will recount the history of computational thought in historical research, discuss networks for history, and highlight some of the cornerstones of network methodologies in historical studies.1 Prepare for part 1 on computational thought in … „A (not so) short history of historical network research – part 1“ weiterlesen\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Pielström, S.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Topic Modeling with Interactive Visualizations in a GUI Tool.\n \n \n \n \n\n\n \n Simmler, S.; Vitt, T.; and Pielström, S.\n\n\n \n\n\n\n In Proceedings of the Digital Humanities Conference, Utrecht, 2019. \n \n\n\n\n
\n\n\n\n \n \n \"TopicPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{simmler_topic_2019,\n\taddress = {Utrecht},\n\ttitle = {Topic {Modeling} with {Interactive} {Visualizations} in a {GUI} {Tool}},\n\turl = {https://dev.clariah.nl/files/dh2019/boa/0637.html},\n\tbooktitle = {Proceedings of the {Digital} {Humanities} {Conference}},\n\tauthor = {Simmler, Severin and Vitt, Thorsten and Pielström, Steffen},\n\tyear = {2019},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Pinche, A.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Generic HTR Models for Medieval Manuscripts. The CREMMALab Project.\n \n \n \n \n\n\n \n Pinche, A.\n\n\n \n\n\n\n Journal of Data Mining & Digital Humanities, Historical Documents and.... October 2023.\n \n\n\n\n
\n\n\n\n \n \n \"GenericPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{pinche2023a,\n\ttitle = {Generic {HTR} {Models} for {Medieval} {Manuscripts}. {The} {CREMMALab} {Project}},\n\tvolume = {Historical Documents and...},\n\tissn = {2416-5999},\n\turl = {https://jdmdh.episciences.org/10252},\n\tdoi = {10.46298/jdmdh.10252},\n\tabstract = {In the Humanities, the emergence of digital methods has opened up research to quantitative analysis and/or to publication of large corpora. To produce more textual data faster, automatic text recognition technology (ATR)1 is increasingly involved in research projects following precursors such as the Himanis project. However, many research teams have limited resources, either financially or in terms of their expertise in artificial intelligence. It may therefore be difficult to integrate ATR into their project pipeline if they need to train a model or to create data from scratch. The goal here is not to explain how to build or improve a new ATR engine, nor to find a way to automatically align a pre-existing corpus with an image to quickly create ground truths for training. This paper aims to help humanists develop models for medieval manuscripts, create and gather training data by knowing the issues underlying their choices. The objective is also to show the importance of data consistency as a prerequisite for building homogeneous corpora and training more accurate models. We will present an overview of our work and experiment in the CREMMALab project (2021-2022), showing first how we ensure the consistency of the data and then how we have developed a generic model for medieval French manuscripts from the 13th to the 15th century, ready to be shared (more than 94\\% accuracy) and/or fine-tuned by other projects.},\n\tlanguage = {en},\n\turldate = {2024-01-03},\n\tjournal = {Journal of Data Mining \\& Digital Humanities},\n\tauthor = {Pinche, Ariane},\n\tmonth = oct,\n\tyear = {2023},\n}\n\n
\n
\n\n\n
\n In the Humanities, the emergence of digital methods has opened up research to quantitative analysis and/or to publication of large corpora. To produce more textual data faster, automatic text recognition technology (ATR)1 is increasingly involved in research projects following precursors such as the Himanis project. However, many research teams have limited resources, either financially or in terms of their expertise in artificial intelligence. It may therefore be difficult to integrate ATR into their project pipeline if they need to train a model or to create data from scratch. The goal here is not to explain how to build or improve a new ATR engine, nor to find a way to automatically align a pre-existing corpus with an image to quickly create ground truths for training. This paper aims to help humanists develop models for medieval manuscripts, create and gather training data by knowing the issues underlying their choices. The objective is also to show the importance of data consistency as a prerequisite for building homogeneous corpora and training more accurate models. We will present an overview of our work and experiment in the CREMMALab project (2021-2022), showing first how we ensure the consistency of the data and then how we have developed a generic model for medieval French manuscripts from the 13th to the 15th century, ready to be shared (more than 94% accuracy) and/or fine-tuned by other projects.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Potter, R.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Statistical Analysis of Literature. A Retrospective on Computers and the Humanities, 1966-1990.\n \n \n \n \n\n\n \n Potter, R. G.\n\n\n \n\n\n\n Computers and the Humanities, (25): 401–429. 1991.\n \n\n\n\n
\n\n\n\n \n \n \"StatisticalPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{potter_statistical_1991,\n\ttitle = {Statistical {Analysis} of {Literature}. {A} {Retrospective} on {Computers} and the {Humanities}, 1966-1990},\n\turl = {http://link.springer.com/article/10.1007%2FBF00141190},\n\tabstract = {This retrospective on statistical analysis of literature in the first twenty-four years of Computers and the Humanities divides the essays under review into four groups: the philosophical, the statistical analyses of language, the statistical analyses of literary texts, and the statistical analyses of themes. It begins with the question: must valid statistical analysis of any literary text be based on a complete linguistic description of the language of the text? It summarizes and evaluates over forty essays, giving details on works discussed, sample sizes used, statistical methods applied, and quotations from the researchers. The essay ends with a polemical summary of what has been done and what the future holds. It emphasizes the importance of extended pre-computational stages of learning about language and discourse analysis; reading previous research, building on and challenging theory; and the use of carefully crafted, small databases to test specific questions.},\n\tlanguage = {en},\n\tnumber = {25},\n\tjournal = {Computers and the Humanities},\n\tauthor = {Potter, Rosanne G.},\n\tyear = {1991},\n\tkeywords = {AnalyzeStatistically, act\\_StylisticAnalysis, meta\\_GiveOverview, obj\\_Literature, t\\_Stylometry},\n\tpages = {401--429},\n}\n\n
\n
\n\n\n
\n This retrospective on statistical analysis of literature in the first twenty-four years of Computers and the Humanities divides the essays under review into four groups: the philosophical, the statistical analyses of language, the statistical analyses of literary texts, and the statistical analyses of themes. It begins with the question: must valid statistical analysis of any literary text be based on a complete linguistic description of the language of the text? It summarizes and evaluates over forty essays, giving details on works discussed, sample sizes used, statistical methods applied, and quotations from the researchers. The essay ends with a polemical summary of what has been done and what the future holds. It emphasizes the importance of extended pre-computational stages of learning about language and discourse analysis; reading previous research, building on and challenging theory; and the use of carefully crafted, small databases to test specific questions.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Rehbein, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Categorising Legal Records – Deductive, Pragmatic, and Computational Strategies.\n \n \n \n\n\n \n Ernst, M.; Gassner, S.; Gerstmeier, M.; and Rehbein, M.\n\n\n \n\n\n\n Digital Humanities Quarterly, 017(3). July 2023.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{ernst_categorising_2023,\n\ttitle = {Categorising {Legal} {Records} – {Deductive}, {Pragmatic}, and {Computational} {Strategies}},\n\tvolume = {017},\n\tissn = {1938-4122},\n\tnumber = {3},\n\tjournal = {Digital Humanities Quarterly},\n\tauthor = {Ernst, Marlene and Gassner, Sebastian and Gerstmeier, Markus and Rehbein, Malte},\n\tmonth = jul,\n\tyear = {2023},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Romanello, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Exploring Citation Networks to Study Intertextuality in Classics.\n \n \n \n\n\n \n Romanello, M.\n\n\n \n\n\n\n Digital Humanities Quarterly, 010(2). June 2016.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{romanello_exploring_2016,\n\ttitle = {Exploring {Citation} {Networks} to {Study} {Intertextuality} in {Classics}},\n\tvolume = {010},\n\tissn = {1938-4122},\n\tnumber = {2},\n\tjournal = {Digital Humanities Quarterly},\n\tauthor = {Romanello, Matteo},\n\tmonth = jun,\n\tyear = {2016},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Romanov, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Algorithmic Analysis of Medieval Arabic Biographical Collections.\n \n \n \n \n\n\n \n Romanov, M.\n\n\n \n\n\n\n Speculum, 92(S1): S226–S246. October 2017.\n \n\n\n\n
\n\n\n\n \n \n \"AlgorithmicPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{romanov_algorithmic_2017,\n\ttitle = {Algorithmic {Analysis} of {Medieval} {Arabic} {Biographical} {Collections}},\n\tvolume = {92},\n\tissn = {0038-7134},\n\turl = {http://www.journals.uchicago.edu/doi/abs/10.1086/693970},\n\tdoi = {10.1086/693970},\n\tnumber = {S1},\n\turldate = {2017-10-19},\n\tjournal = {Speculum},\n\tauthor = {Romanov, Maxim},\n\tmonth = oct,\n\tyear = {2017},\n\tpages = {S226--S246},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Sawyer, D.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Schmidt, B.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Words Alone: Dismantling Topic Models in the Humanities.\n \n \n \n \n\n\n \n Schmidt, B.\n\n\n \n\n\n\n Journal of Digital Humanities, 2(1). 2013.\n \n\n\n\n
\n\n\n\n \n \n \"WordsPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{schmidt_words_2013,\n\ttitle = {Words {Alone}: {Dismantling} {Topic} {Models} in the {Humanities}},\n\tvolume = {2},\n\turl = {http://journalofdigitalhumanities.org/2-1/words-alone-by-benjamin-m-schmidt/},\n\tnumber = {1},\n\turldate = {2020-05-25},\n\tjournal = {Journal of Digital Humanities},\n\tauthor = {Schmidt, Ben},\n\tyear = {2013},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Schwandt, S.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Der eHumanities Desktop als Werkzeug in der historischen Semantik: Funktionsspektren und Einsatzszenarien.\n \n \n \n \n\n\n \n Mehler, A.; Schwandt, S.; Gleim, R.; and Jussen, B.\n\n\n \n\n\n\n Journal for Language Technology and Computational Linguistics (JCLC), 26(1). 2011.\n \n\n\n\n
\n\n\n\n \n \n \"DerPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{mehler_ehumanities_2011,\n\ttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}: {Funktionsspektren} und {Einsatzszenarien}},\n\tvolume = {26},\n\tissn = {2190-6858},\n\tshorttitle = {Der {eHumanities} {Desktop} als {Werkzeug} in der historischen {Semantik}},\n\turl = {https://pub.uni-bielefeld.de/record/2902769},\n\tlanguage = {ger},\n\tnumber = {1},\n\turldate = {2021-08-26},\n\tjournal = {Journal for Language Technology and Computational Linguistics (JCLC)},\n\tauthor = {Mehler, Alexander and Schwandt, Silke and Gleim, Rüdiger and Jussen, Bernhard},\n\tyear = {2011},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Simmler, S.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Topic Modeling with Interactive Visualizations in a GUI Tool.\n \n \n \n \n\n\n \n Simmler, S.; Vitt, T.; and Pielström, S.\n\n\n \n\n\n\n In Proceedings of the Digital Humanities Conference, Utrecht, 2019. \n \n\n\n\n
\n\n\n\n \n \n \"TopicPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{simmler_topic_2019,\n\taddress = {Utrecht},\n\ttitle = {Topic {Modeling} with {Interactive} {Visualizations} in a {GUI} {Tool}},\n\turl = {https://dev.clariah.nl/files/dh2019/boa/0637.html},\n\tbooktitle = {Proceedings of the {Digital} {Humanities} {Conference}},\n\tauthor = {Simmler, Severin and Vitt, Thorsten and Pielström, Steffen},\n\tyear = {2019},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Sleiderink, R.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Stefan, P.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A survey of OCR evaluation tools and metrics.\n \n \n \n \n\n\n \n Neudecker, C.; Baierer, K.; Gerber, M.; Christian, C.; Apostolos, A.; and Stefan, P.\n\n\n \n\n\n\n In The 6th International Workshop on Historical Document Imaging and Processing, pages 13–18. Association for Computing Machinery, New York, NY, USA, September 2021.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@incollection{neudecker_survey_2021,\n\taddress = {New York, NY, USA},\n\ttitle = {A survey of {OCR} evaluation tools and metrics},\n\tisbn = {978-1-4503-8690-6},\n\turl = {https://doi.org/10.1145/3476887.3476888},\n\tabstract = {The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.},\n\turldate = {2022-02-03},\n\tbooktitle = {The 6th {International} {Workshop} on {Historical} {Document} {Imaging} and {Processing}},\n\tpublisher = {Association for Computing Machinery},\n\tauthor = {Neudecker, Clemens and Baierer, Konstantin and Gerber, Mike and Christian, Clausner and Apostolos, Antonacopoulos and Stefan, Pletschacher},\n\tmonth = sep,\n\tyear = {2021},\n\tkeywords = {accuracy, evaluation, metrics, optical character recognition},\n\tpages = {13--18},\n}\n\n
\n
\n\n\n
\n The millions of pages of historical documents that are digitized in libraries are increasingly used in contexts that have more specific requirements for OCR quality than keyword search. How to comprehensively, efficiently and reliably assess the quality of OCR results against the background of mass digitization, when ground truth can only ever be produced for very small numbers? Due to gaps in specifications, results from OCR evaluation tools can return different results, and due to differences in implementation, even commonly used error rates are often not directly comparable. OCR evaluation metrics and sampling methods are also not sufficient where they do not take into account the accuracy of layout analysis, since for advanced use cases like Natural Language Processing or the Digital Humanities, accurate layout analysis and detection of the reading order are crucial. We provide an overview of OCR evaluation metrics and tools, describe two advanced use cases for OCR results, and perform an OCR evaluation experiment with multiple evaluation tools and different metrics for two distinct datasets. We analyze the differences and commonalities in light of the presented use cases and suggest areas for future work.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Stutzmann, D.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Liste d’abréviations latines pour l’encodage XML.\n \n \n \n \n\n\n \n Stutzmann, D.\n\n\n \n\n\n\n May 2016.\n \n\n\n\n
\n\n\n\n \n \n \"ListePaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@misc{stutzmann_liste_2016,\n\ttype = {Billet},\n\ttitle = {Liste d’abréviations latines pour l’encodage {XML}},\n\turl = {https://oriflamms.hypotheses.org/1471},\n\tabstract = {La perception, l’analyse et l’encodage des abréviations est un sujet complexe, sur lequel le projet Oriflamms a déjà permis de se consacrer (cf. D. Stutzmann, « Ontologie des formes et encodage des textes manuscrits médiévaux. Le projet ORIFLAMMS », Document numérique, 16/3, 2013, p. 81-95, aux pages 86-89, accessible en ligne : https://www.cairn.info/resume.php?ID\\_ARTICLE=DN\\_163\\_0081). Pour faire avancer … Continuer la lecture de Liste d’abréviations latines pour l’encodage XML  →},\n\tlanguage = {fr-FR},\n\turldate = {2022-01-11},\n\tjournal = {Écriture médiévale \\& numérique},\n\tauthor = {Stutzmann, Dominique},\n\tmonth = may,\n\tyear = {2016},\n}\n\n
\n
\n\n\n
\n La perception, l’analyse et l’encodage des abréviations est un sujet complexe, sur lequel le projet Oriflamms a déjà permis de se consacrer (cf. D. Stutzmann, « Ontologie des formes et encodage des textes manuscrits médiévaux. Le projet ORIFLAMMS », Document numérique, 16/3, 2013, p. 81-95, aux pages 86-89, accessible en ligne : https://www.cairn.info/resume.php?ID_ARTICLE=DN_163_0081). Pour faire avancer … Continuer la lecture de Liste d’abréviations latines pour l’encodage XML →\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Underwood, T.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n Distant horizons digital evidence and literary change.\n \n \n \n\n\n \n Underwood, T.\n\n\n \n\n\n\n 2019.\n ISBN: 9780226612836 Place: Chicago\n\n\n\n
\n\n\n\n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@misc{underwood_distant_2019,\n\ttitle = {Distant horizons digital evidence and literary change},\n\tlanguage = {eng},\n\tpublisher = {The University of Chicago Press},\n\tauthor = {Underwood, Ted},\n\tyear = {2019},\n\tnote = {ISBN: 9780226612836\nPlace: Chicago},\n\tkeywords = {Digital Humanities, Digital humanities, Literature, Literaturwissenschaft, Methodology, Research},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Vitt, T.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Topic Modeling with Interactive Visualizations in a GUI Tool.\n \n \n \n \n\n\n \n Simmler, S.; Vitt, T.; and Pielström, S.\n\n\n \n\n\n\n In Proceedings of the Digital Humanities Conference, Utrecht, 2019. \n \n\n\n\n
\n\n\n\n \n \n \"TopicPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{simmler_topic_2019,\n\taddress = {Utrecht},\n\ttitle = {Topic {Modeling} with {Interactive} {Visualizations} in a {GUI} {Tool}},\n\turl = {https://dev.clariah.nl/files/dh2019/boa/0637.html},\n\tbooktitle = {Proceedings of the {Digital} {Humanities} {Conference}},\n\tauthor = {Simmler, Severin and Vitt, Thorsten and Pielström, Steffen},\n\tyear = {2019},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Wevers, M.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n What Shall We Do With the Unseen Sailor? Estimating the Size of the Dutch East India Company Using an Unseen Species Model.\n \n \n \n \n\n\n \n Wevers, M.; Karsdorp, F.; and van Lottum, J.\n\n\n \n\n\n\n In CEUR Workshop Proceedings, Antwerp, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"WhatPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{wevers_what_2022,\n\taddress = {Antwerp},\n\ttitle = {What {Shall} {We} {Do} {With} the {Unseen} {Sailor}? {Estimating} the {Size} of the {Dutch} {East} {India} {Company} {Using} an {Unseen} {Species} {Model}},\n\turl = {https://ceur-ws.org/Vol-3290/short_paper1793.pdf},\n\tbooktitle = {{CEUR} {Workshop} {Proceedings}},\n\tauthor = {Wevers, Melvin and Karsdorp, Folgert and van Lottum, Jelle},\n\tyear = {2022},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Witten, D.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n A comparative study of machine learning methods for authorship attribution.\n \n \n \n \n\n\n \n Jockers, M. L.; and Witten, D. M.\n\n\n \n\n\n\n Literary and Linguistic Computing, 25(2): 215–223. June 2010.\n \n\n\n\n
\n\n\n\n \n \n \"APaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n
\n
@article{jockers_comparative_2010,\n\ttitle = {A comparative study of machine learning methods for authorship attribution},\n\tvolume = {25},\n\turl = {http://llc.oxfordjournals.org/content/25/2/215.abstract},\n\tdoi = {10.1093/llc/fqq001},\n\tabstract = {We compare and benchmark the performance of five classification methods, four of which are taken from the machine learning literature, in a classic authorship attribution problem involving the Federalist Papers. Cross-validation results are reported for each method, and each method is further employed in classifying the disputed papers and the few papers that are generally understood to be coauthored. These tests are performed using two separate feature sets: a “raw” feature set containing all words and word bigrams that are common to all of the authors, and a second “pre-processed” feature set derived by reducing the raw feature set to include only words meeting a minimum relative frequency threshold. Each of the methods tested performed well, but nearest shrunken centroids and regularized discriminant analysis had the best overall performances with 0/70 cross-validation errors.},\n\tlanguage = {en},\n\tnumber = {2},\n\turldate = {2011-12-14},\n\tjournal = {Literary and Linguistic Computing},\n\tauthor = {Jockers, Matthew L. and Witten, Daniela M.},\n\tmonth = jun,\n\tyear = {2010},\n\tkeywords = {AnalyzeStatistically, bigdata, meta\\_Theorizing, t\\_MachineLearning, t\\_Stylometry},\n\tpages = {215--223},\n}\n\n
\n
\n\n\n
\n We compare and benchmark the performance of five classification methods, four of which are taken from the machine learning literature, in a classic authorship attribution problem involving the Federalist Papers. Cross-validation results are reported for each method, and each method is further employed in classifying the disputed papers and the few papers that are generally understood to be coauthored. These tests are performed using two separate feature sets: a “raw” feature set containing all words and word bigrams that are common to all of the authors, and a second “pre-processed” feature set derived by reducing the raw feature set to include only words meeting a minimum relative frequency threshold. Each of the methods tested performed well, but nearest shrunken centroids and regularized discriminant analysis had the best overall performances with 0/70 cross-validation errors.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n de Bruijn, E.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n van Lottum, J.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n What Shall We Do With the Unseen Sailor? Estimating the Size of the Dutch East India Company Using an Unseen Species Model.\n \n \n \n \n\n\n \n Wevers, M.; Karsdorp, F.; and van Lottum, J.\n\n\n \n\n\n\n In CEUR Workshop Proceedings, Antwerp, 2022. \n \n\n\n\n
\n\n\n\n \n \n \"WhatPaper\n  \n \n\n \n\n \n link\n  \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@inproceedings{wevers_what_2022,\n\taddress = {Antwerp},\n\ttitle = {What {Shall} {We} {Do} {With} the {Unseen} {Sailor}? {Estimating} the {Size} of the {Dutch} {East} {India} {Company} {Using} an {Unseen} {Species} {Model}},\n\turl = {https://ceur-ws.org/Vol-3290/short_paper1793.pdf},\n\tbooktitle = {{CEUR} {Workshop} {Proceedings}},\n\tauthor = {Wevers, Melvin and Karsdorp, Folgert and van Lottum, Jelle},\n\tyear = {2022},\n}\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n
\n\n
\n
\n  \n Ó Macháin, P.\n \n \n (1)\n \n \n
\n
\n \n \n
\n \n\n \n \n \n \n \n \n Forgotten books: The application of unseen species models to the survival of culture.\n \n \n \n \n\n\n \n Kestemont, M.; Karsdorp, F.; de Bruijn, E.; Driscoll, M.; Kapitan, K. A.; Ó Macháin, P.; Sawyer, D.; Sleiderink, R.; and Chao, A.\n\n\n \n\n\n\n Science, 375(6582): 765–769. February 2022.\n Publisher: American Association for the Advancement of Science\n\n\n\n
\n\n\n\n \n \n \"ForgottenPaper\n  \n \n\n \n \n doi\n  \n \n\n \n link\n  \n \n\n bibtex\n \n\n \n  \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n  \n \n \n\n\n\n
\n
@article{kestemont_forgotten_2022,\n\ttitle = {Forgotten books: {The} application of unseen species models to the survival of culture},\n\tvolume = {375},\n\tshorttitle = {Forgotten books},\n\turl = {https://www.science.org/doi/10.1126/science.abl7655},\n\tdoi = {10.1126/science.abl7655},\n\tabstract = {The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.},\n\tnumber = {6582},\n\turldate = {2023-06-16},\n\tjournal = {Science},\n\tauthor = {Kestemont, Mike and Karsdorp, Folgert and de Bruijn, Elisabeth and Driscoll, Matthew and Kapitan, Katarzyna A. and Ó Macháin, Pádraig and Sawyer, Daniel and Sleiderink, Remco and Chao, Anne},\n\tmonth = feb,\n\tyear = {2022},\n\tnote = {Publisher: American Association for the Advancement of Science},\n\tpages = {765--769},\n}\n\n
\n
\n\n\n
\n The study of ancient cultures is hindered by the incomplete survival of material artifacts, so we commonly underestimate the diversity of cultural production in historic societies. To correct this survivorship bias, we applied unseen species models from ecology to gauge the loss of narratives from medieval Europe, such as the romances about King Arthur. The estimates obtained are compatible with the scant historic evidence. In addition to events such as library fires, we identified the original evenness of cultural populations as an overlooked factor in these assemblages’ stability in the face of immaterial loss. We link the elevated evenness in island literatures to analogous accounts of ecological and cultural diversity in insular communities. These analyses call for a wider application of these methods across the heritage sciences.\n
\n\n\n
\n\n\n\n\n\n
\n
\n\n\n\n\n
\n\n\n \n\n \n \n \n \n\n
\n"}; document.write(bibbase_data.data);