@PhilosTEI: Building Corpora for Philosophers [in press]. Betti, A., Reynaert, M., & van den Berg, H. In Odijk, J. & van Hessen, A., editors, CLNBK16. Ubiquity Press, London, 2016. abstract bibtex The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.
@incollection{betti_philostei:_2016,
address = {London},
title = {@{PhilosTEI}: {Building} {Corpora} for {Philosophers} [in press]},
abstract = {The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.},
booktitle = {{CLNBK16}},
publisher = {Ubiquity Press},
author = {Betti, Arianna and Reynaert, Martin and van den Berg, Hein},
editor = {Odijk, Jan and van Hessen, Arjan},
year = {2016},
}
Downloads: 0
{"_id":"ZYAvMrD5bSjqq4KgP","bibbaseid":"betti-reynaert-vandenberg-philosteibuildingcorporaforphilosophersinpress-2016","downloads":0,"creationDate":"2016-09-22T13:52:36.406Z","title":"@PhilosTEI: Building Corpora for Philosophers [in press]","author_short":["Betti, A.","Reynaert, M.","van den Berg, H."],"year":2016,"bibtype":"incollection","biburl":"https://api.zotero.org/groups/902605/items?key=bi2Q7duoPuqjf6lgym4TgM83&format=bibtex&limit=100","bibdata":{"bibtype":"incollection","type":"incollection","address":"London","title":"@PhilosTEI: Building Corpora for Philosophers [in press]","abstract":"The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.","booktitle":"CLNBK16","publisher":"Ubiquity Press","author":[{"propositions":[],"lastnames":["Betti"],"firstnames":["Arianna"],"suffixes":[]},{"propositions":[],"lastnames":["Reynaert"],"firstnames":["Martin"],"suffixes":[]},{"propositions":["van","den"],"lastnames":["Berg"],"firstnames":["Hein"],"suffixes":[]}],"editor":[{"propositions":[],"lastnames":["Odijk"],"firstnames":["Jan"],"suffixes":[]},{"propositions":["van"],"lastnames":["Hessen"],"firstnames":["Arjan"],"suffixes":[]}],"year":"2016","bibtex":"@incollection{betti_philostei:_2016,\n\taddress = {London},\n\ttitle = {@{PhilosTEI}: {Building} {Corpora} for {Philosophers} [in press]},\n\tabstract = {The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.},\n\tbooktitle = {{CLNBK16}},\n\tpublisher = {Ubiquity Press},\n\tauthor = {Betti, Arianna and Reynaert, Martin and van den Berg, Hein},\n\teditor = {Odijk, Jan and van Hessen, Arjan},\n\tyear = {2016},\n}\n\n","author_short":["Betti, A.","Reynaert, M.","van den Berg, H."],"editor_short":["Odijk, J.","van Hessen, A."],"key":"betti_philostei:_2016","id":"betti_philostei:_2016","bibbaseid":"betti-reynaert-vandenberg-philosteibuildingcorporaforphilosophersinpress-2016","role":"author","urls":{},"metadata":{"authorlinks":{}},"downloads":0},"search_terms":["philostei","building","corpora","philosophers","press","betti","reynaert","van den berg"],"keywords":[],"authorIDs":["57e3e224f7ba57245f000035"],"dataSources":["27JiNQ95ZMxDzJmEN"]}