Building readability lexicons with unannotated corpora. Brooke, J., Tsang, V., Jacob, D., Shein, F., & Hirst, G. In Proceedings, Workshop on Predicting and Improving Text Readability for Target Reader Populations, Montreal, 2012. The posterabstract bibtex Lexicons of word difficulty are useful for various educational applications, including readability classification and text simplification. In this work, we explore automatic creation of these lexicons using methods which go beyond simple term frequency, but without relying on age-graded texts. In particular, we derive information for each word type from the readability of the web documents they appear in and the words they co-occur with, linearly combining these various features. We show the efficacy of this approach by comparing our lexicon with an existing coarse-grained, low-coverage resource and a new crowdsourced annotation.
@InProceedings{ brooke7,
author = {Julian Brooke and Vivian Tsang and David Jacob and Fraser
Shein and Graeme Hirst},
title = {Building readability lexicons with unannotated corpora},
address = {Montreal},
booktitle = {Proceedings, Workshop on Predicting and Improving Text
Readability for Target Reader Populations},
year = {2012},
download = {http://ftp.cs.toronto.edu/pub/gh/Brooke-etal-PITR-2012.pdf}
,
note = {<a
href=http://ftp.cs.toronto.edu/pub/gh/Brooke-etal-PITR-2012-poster.pdf>The
poster</a>},
abstract = {Lexicons of word difficulty are useful for various
educational applications, including readability
classification and text simplification. In this work, we
explore automatic creation of these lexicons using methods
which go beyond simple term frequency, but without relying
on age-graded texts. In particular, we derive information
for each word type from the readability of the web
documents they appear in and the words they co-occur with,
linearly combining these various features. We show the
efficacy of this approach by comparing our lexicon with an
existing coarse-grained, low-coverage resource and a new
crowdsourced annotation.}
}
Downloads: 0
{"_id":{"_str":"53d57a48f414ae191e0003a7"},"__v":0,"authorIDs":[],"author_short":["Brooke, J.","Tsang, V.","Jacob, D.","Shein, F.","Hirst, G."],"bibbaseid":"brooke-tsang-jacob-shein-hirst-buildingreadabilitylexiconswithunannotatedcorpora-2012","bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Julian"],"propositions":[],"lastnames":["Brooke"],"suffixes":[]},{"firstnames":["Vivian"],"propositions":[],"lastnames":["Tsang"],"suffixes":[]},{"firstnames":["David"],"propositions":[],"lastnames":["Jacob"],"suffixes":[]},{"firstnames":["Fraser"],"propositions":[],"lastnames":["Shein"],"suffixes":[]},{"firstnames":["Graeme"],"propositions":[],"lastnames":["Hirst"],"suffixes":[]}],"title":"Building readability lexicons with unannotated corpora","address":"Montreal","booktitle":"Proceedings, Workshop on Predicting and Improving Text Readability for Target Reader Populations","year":"2012","download":"http://ftp.cs.toronto.edu/pub/gh/Brooke-etal-PITR-2012.pdf","note":"<a href=http://ftp.cs.toronto.edu/pub/gh/Brooke-etal-PITR-2012-poster.pdf>The poster</a>","abstract":"Lexicons of word difficulty are useful for various educational applications, including readability classification and text simplification. In this work, we explore automatic creation of these lexicons using methods which go beyond simple term frequency, but without relying on age-graded texts. In particular, we derive information for each word type from the readability of the web documents they appear in and the words they co-occur with, linearly combining these various features. We show the efficacy of this approach by comparing our lexicon with an existing coarse-grained, low-coverage resource and a new crowdsourced annotation.","bibtex":"@InProceedings{\t brooke7,\n author\t= {Julian Brooke and Vivian Tsang and David Jacob and Fraser\n\t\t Shein and Graeme Hirst},\n title\t\t= {Building readability lexicons with unannotated corpora},\n address\t= {Montreal},\n booktitle\t= {Proceedings, Workshop on Predicting and Improving Text\n\t\t Readability for Target Reader Populations},\n year\t\t= {2012},\n download\t= {http://ftp.cs.toronto.edu/pub/gh/Brooke-etal-PITR-2012.pdf}\n\t\t ,\n note\t\t= {<a\n\t\t href=http://ftp.cs.toronto.edu/pub/gh/Brooke-etal-PITR-2012-poster.pdf>The\n\t\t poster</a>},\n abstract\t= {Lexicons of word difficulty are useful for various\n\t\t educational applications, including readability\n\t\t classification and text simplification. In this work, we\n\t\t explore automatic creation of these lexicons using methods\n\t\t which go beyond simple term frequency, but without relying\n\t\t on age-graded texts. In particular, we derive information\n\t\t for each word type from the readability of the web\n\t\t documents they appear in and the words they co-occur with,\n\t\t linearly combining these various features. We show the\n\t\t efficacy of this approach by comparing our lexicon with an\n\t\t existing coarse-grained, low-coverage resource and a new\n\t\t crowdsourced annotation.}\n}\n\n","author_short":["Brooke, J.","Tsang, V.","Jacob, D.","Shein, F.","Hirst, G."],"key":"brooke7","id":"brooke7","bibbaseid":"brooke-tsang-jacob-shein-hirst-buildingreadabilitylexiconswithunannotatedcorpora-2012","role":"author","urls":{},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"www.cs.toronto.edu/~fritz/tmp/compling.bib","creationDate":"2014-07-27T22:16:40.636Z","downloads":0,"keywords":[],"search_terms":["building","readability","lexicons","unannotated","corpora","brooke","tsang","jacob","shein","hirst"],"title":"Building readability lexicons with unannotated corpora","year":2012,"dataSources":["n8jB5BJxaeSmH6mtR","6b6A9kbkw4CsEGnRX"]}