@PhilosTEI: Building Corpora for Philosophers. Betti, A., Reynaert, M., & van den Berg, H. In Odijk, J. & van Hessen, A., editors, CLARIN in the Low Countries, pages 371–384. Ubiquity Press, London, 2017.
@PhilosTEI: Building Corpora for Philosophers [link]Paper  abstract   bibtex   
The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.
@incollection{betti_philostei_2017,
	address = {London},
	title = {@{PhilosTEI}: {Building} {Corpora} for {Philosophers}},
	url = {http://dx.doi.org/10.5334/bbi.32},
	abstract = {The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.},
	booktitle = {{CLARIN} in the {Low} {Countries}},
	publisher = {Ubiquity Press},
	author = {Betti, Arianna and Reynaert, Martin and van den Berg, Hein},
	editor = {Odijk, Jan and van Hessen, Arjan},
	year = {2017},
	pages = {371--384},
}

Downloads: 0