Case-sensitive letter and bigram frequency counts from large-scale English corpora.

Case-sensitive letter and bigram frequency counts from large-scale English corpora. Jones, M. N & Mewhort, D. J K Behav Res Methods Instrum Comput, 36(3):388-96, 2004.
abstract bibtex

We tabulated upper- and lowercase letter frequency using several large-scale English corpora (approximately 183 million words in total). The results indicate that the relative frequencies for upper- and lowercase letters are not equivalent. We report a letter-naming experiment in which uppercase frequency predicted response time to uppercase letters better than did lowercase frequency. Tables of case-sensitive letter and bigram frequency are provided, including common nonalphabetic characters. Because subjects are sensitive to frequency relationships among letters, we recommend that experimenters use case-sensitive counts when constructing stimuli from letters.

@Article{Jones2004,
  author   = {Michael N Jones and D. J K Mewhort},
  journal  = {Behav Res Methods Instrum Comput},
  title    = {Case-sensitive letter and bigram frequency counts from large-scale {E}nglish corpora.},
  year     = {2004},
  number   = {3},
  pages    = {388-96},
  volume   = {36},
  abstract = {We tabulated upper- and lowercase letter frequency using several large-scale
	English corpora (approximately 183 million words in total). The results
	indicate that the relative frequencies for upper- and lowercase letters
	are not equivalent. We report a letter-naming experiment in which
	uppercase frequency predicted response time to uppercase letters
	better than did lowercase frequency. Tables of case-sensitive letter
	and bigram frequency are provided, including common nonalphabetic
	characters. Because subjects are sensitive to frequency relationships
	among letters, we recommend that experimenters use case-sensitive
	counts when constructing stimuli from letters.},
  keywords = {Cues, Fixation, Humans, Linguistics, Ocular, Periodicity, Visual Perception, Vocabulary, 15641428},
}

Downloads: 0

{"_id":"9yqHQnGEqFRczFmNc","bibbaseid":"jones-mewhort-casesensitiveletterandbigramfrequencycountsfromlargescaleenglishcorpora-2004","authorIDs":[],"author_short":["Jones, M. N","Mewhort, D. J K"],"bibdata":{"bibtype":"article","type":"article","author":[{"firstnames":["Michael","N"],"propositions":[],"lastnames":["Jones"],"suffixes":[]},{"firstnames":["D.","J","K"],"propositions":[],"lastnames":["Mewhort"],"suffixes":[]}],"journal":"Behav Res Methods Instrum Comput","title":"Case-sensitive letter and bigram frequency counts from large-scale English corpora.","year":"2004","number":"3","pages":"388-96","volume":"36","abstract":"We tabulated upper- and lowercase letter frequency using several large-scale English corpora (approximately 183 million words in total). The results indicate that the relative frequencies for upper- and lowercase letters are not equivalent. We report a letter-naming experiment in which uppercase frequency predicted response time to uppercase letters better than did lowercase frequency. Tables of case-sensitive letter and bigram frequency are provided, including common nonalphabetic characters. Because subjects are sensitive to frequency relationships among letters, we recommend that experimenters use case-sensitive counts when constructing stimuli from letters.","keywords":"Cues, Fixation, Humans, Linguistics, Ocular, Periodicity, Visual Perception, Vocabulary, 15641428","bibtex":"@Article{Jones2004,\n author = {Michael N Jones and D. J K Mewhort},\n journal = {Behav Res Methods Instrum Comput},\n title = {Case-sensitive letter and bigram frequency counts from large-scale {E}nglish corpora.},\n year = {2004},\n number = {3},\n pages = {388-96},\n volume = {36},\n abstract = {We tabulated upper- and lowercase letter frequency using several large-scale\n\tEnglish corpora (approximately 183 million words in total). The results\n\tindicate that the relative frequencies for upper- and lowercase letters\n\tare not equivalent. We report a letter-naming experiment in which\n\tuppercase frequency predicted response time to uppercase letters\n\tbetter than did lowercase frequency. Tables of case-sensitive letter\n\tand bigram frequency are provided, including common nonalphabetic\n\tcharacters. Because subjects are sensitive to frequency relationships\n\tamong letters, we recommend that experimenters use case-sensitive\n\tcounts when constructing stimuli from letters.},\n keywords = {Cues, Fixation, Humans, Linguistics, Ocular, Periodicity, Visual Perception, Vocabulary, 15641428},\n}\n\n","author_short":["Jones, M. N","Mewhort, D. J K"],"key":"Jones2004","id":"Jones2004","bibbaseid":"jones-mewhort-casesensitiveletterandbigramfrequencycountsfromlargescaleenglishcorpora-2004","role":"author","urls":{},"keyword":["Cues","Fixation","Humans","Linguistics","Ocular","Periodicity","Visual Perception","Vocabulary","15641428"],"metadata":{"authorlinks":{}},"downloads":0},"bibtype":"article","biburl":"https://endress.org/publications/ansgar.bib","creationDate":"2020-07-03T22:46:22.339Z","downloads":0,"keywords":["cues","fixation","humans","linguistics","ocular","periodicity","visual perception","vocabulary","15641428"],"search_terms":["case","sensitive","letter","bigram","frequency","counts","large","scale","english","corpora","jones","mewhort"],"title":"Case-sensitive letter and bigram frequency counts from large-scale English corpora.","year":2004,"dataSources":["5S2zj2hKW8TWTkuMq","xPGxHAeh3vZpx4yyE","TXa55dQbNoWnaGmMq"]}