@PhilosTEI: Building Corpora for Philosophers. Betti, A., Reynaert, M., & van den Berg, H. In Odijk, J. & van Hessen, A, editors, Clarin in the low Countries, pages 379–392. Ubiquity Press, London, 2017.
abstract   bibtex   
The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.
@incollection{betti_philostei:_2017,
	address = {London},
	title = {@{PhilosTEI}: {Building} {Corpora} for {Philosophers}},
	abstract = {The step to e-research in philosophy depends on the availability of high quality, easily and freely accessible corpora in a sustainable format composed from multi-language, multi-script books from different historical periods. Corpora matching these needs are at the moment virtually non-existing. Within @PhilosTei, we have addressed this corpus building problem by developing an open source, web-based, user-friendly workflow from textual images to TEI, based on state-of-the-art open source OCR software, to wit Tesseract, and a multi-language version of TICCL, a powerful OCR post-correction tool. We have demonstrated the utility of the tool by applying it to a multilingual, multi-script corpus of important eighteenth to twentieth-century European philosophical texts.},
	booktitle = {Clarin in the low {Countries}},
	publisher = {Ubiquity Press},
	author = {Betti, Arianna and Reynaert, Martin and van den Berg, Hein},
	editor = {Odijk, Jan and van Hessen, A},
	year = {2017},
	pages = {379--392},
}

Downloads: 0