An Image Dataset of Text Patches in Everyday Scenes. Ibrahim, A., Abbott, A. L., & Hussein, M. E. In Bebis, G., Boyle, R., Parvin, B., Koracin, D., Porikli, F., Skaff, S., Entezari, A., Min, J., Iwai, D., Sadagic, A., Scheidegger, C., & Isenberg, T., editors, Advances in Visual Computing, of Lecture Notes in Computer Science, pages 291–300, 2016. Springer International Publishing.
abstract   bibtex   
This paper describes a dataset containing small images of text from everyday scenes. The purpose of the dataset is to support the development of new automated systems that can detect and analyze text. Although much research has been devoted to text detection and recognition in scanned documents, relatively little attention has been given to text detection in other types of images, such as photographs that are posted on social-media sites. This new dataset, known as COCO-Text-Patch, contains approximately 354,000 small images that are each labeled as “text” or “non-text”. This dataset particularly addresses the problem of text verification, which is an essential stage in the end-to-end text detection and recognition pipeline. In order to evaluate the utility of this dataset, it has been used to train two deep convolution neural networks to distinguish text from non-text. One network is inspired by the GoogLeNet architecture, and the second one is based on CaffeNet. Accuracy levels of 90.2% and 90.9% were obtained using the two networks, respectively. All of the images, source code, and deep-learning trained models described in this paper will be publicly available (https://aicentral.github.io/coco-text-patch/).
@inproceedings{ibrahim_image_2016,
	title = {An Image Dataset of Text Patches in Everyday Scenes},
	rights = {All rights reserved},
	isbn = {978-3-319-50832-0},
	series = {Lecture Notes in Computer Science},
	abstract = {This paper describes a dataset containing small images of text from everyday scenes. The purpose of the dataset is to support the development of new automated systems that can detect and analyze text. Although much research has been devoted to text detection and recognition in scanned documents, relatively little attention has been given to text detection in other types of images, such as photographs that are posted on social-media sites. This new dataset, known as {COCO}-Text-Patch, contains approximately 354,000 small images that are each labeled as “text” or “non-text”. This dataset particularly addresses the problem of text verification, which is an essential stage in the end-to-end text detection and recognition pipeline. In order to evaluate the utility of this dataset, it has been used to train two deep convolution neural networks to distinguish text from non-text. One network is inspired by the {GoogLeNet} architecture, and the second one is based on {CaffeNet}. Accuracy levels of 90.2\% and 90.9\% were obtained using the two networks, respectively. All of the images, source code, and deep-learning trained models described in this paper will be publicly available (https://aicentral.github.io/coco-text-patch/).},
	pages = {291--300},
	booktitle = {Advances in Visual Computing},
	publisher = {Springer International Publishing},
	author = {Ibrahim, Ahmed and Abbott, A. Lynn and Hussein, Mohamed E.},
	editor = {Bebis, George and Boyle, Richard and Parvin, Bahram and Koracin, Darko and Porikli, Fatih and Skaff, Sandra and Entezari, Alireza and Min, Jianyuan and Iwai, Daisuke and Sadagic, Amela and Scheidegger, Carlos and Isenberg, Tobias},
	year = {2016},
	langid = {english},
	keywords = {Convolution Neural Network, Convolutional Neural Network, Optical Character Recognition, Region Proposal, Text Detection}
}

Downloads: 0