Towards combining rule-based and statistical part of speech tagging in agglutinative languages

Towards combining rule-based and statistical part of speech tagging in agglutinative languages. Altunyurt, L., Orhan, Z., & Güngör, T. Technical Report 2007.

Paper abstract bibtex

We present a composite part of speech tagger for Turkish which combines the rule-based and statistical approaches. The tagger makes use of word frequencies and n-gram statistics from a corpus. We use the output of a morphological analyzer in order to get more accurate results and also to eliminate the sparse data problem. In addition, we employ a heuristics about the position of words in the sentences. Although the experiments have been performed on a very small corpus, the results have shown that the use of a composite approach and heuristics improves the accuracy of the tagger. Keywords: agglutinative language, part of speech tagger, rule-based and statistical method

@techreport{
 title = {Towards combining rule-based and statistical part of speech tagging in agglutinative languages},
 type = {techreport},
 year = {2007},
 source = {Computer engineering},
 volume = {1},
 issue = {1},
 id = {4ef9a84d-1641-3d3f-92cb-12d791197a23},
 created = {2019-10-12T11:01:01.071Z},
 accessed = {2019-10-12},
 file_attached = {true},
 profile_id = {1971c810-6732-3a00-9f6b-d217e1a53071},
 group_id = {cbcfbfec-195f-3b99-b6a1-d26e1dd80ff5},
 last_modified = {2019-10-12T11:01:01.147Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 private_publication = {false},
 abstract = {We present a composite part of speech tagger for Turkish which combines the rule-based and statistical approaches. The tagger makes use of word frequencies and n-gram statistics from a corpus. We use the output of a morphological analyzer in order to get more accurate results and also to eliminate the sparse data problem. In addition, we employ a heuristics about the position of words in the sentences. Although the experiments have been performed on a very small corpus, the results have shown that the use of a composite approach and heuristics improves the accuracy of the tagger. Keywords: agglutinative language, part of speech tagger, rule-based and statistical method},
 bibtype = {techreport},
 author = {Altunyurt, Levent and Orhan, Zihni and Güngör, Tunga}
}

Downloads: 0

{"_id":"JxPrhyByx7o6zyFaT","bibbaseid":"altunyurt-orhan-gngr-towardscombiningrulebasedandstatisticalpartofspeechtagginginagglutinativelanguages-2007","authorIDs":[],"author_short":["Altunyurt, L.","Orhan, Z.","Güngör, T."],"bibdata":{"title":"Towards combining rule-based and statistical part of speech tagging in agglutinative languages","type":"techreport","year":"2007","source":"Computer engineering","volume":"1","issue":"1","id":"4ef9a84d-1641-3d3f-92cb-12d791197a23","created":"2019-10-12T11:01:01.071Z","accessed":"2019-10-12","file_attached":"true","profile_id":"1971c810-6732-3a00-9f6b-d217e1a53071","group_id":"cbcfbfec-195f-3b99-b6a1-d26e1dd80ff5","last_modified":"2019-10-12T11:01:01.147Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"private_publication":false,"abstract":"We present a composite part of speech tagger for Turkish which combines the rule-based and statistical approaches. The tagger makes use of word frequencies and n-gram statistics from a corpus. We use the output of a morphological analyzer in order to get more accurate results and also to eliminate the sparse data problem. In addition, we employ a heuristics about the position of words in the sentences. Although the experiments have been performed on a very small corpus, the results have shown that the use of a composite approach and heuristics improves the accuracy of the tagger. Keywords: agglutinative language, part of speech tagger, rule-based and statistical method","bibtype":"techreport","author":"Altunyurt, Levent and Orhan, Zihni and Güngör, Tunga","bibtex":"@techreport{\n title = {Towards combining rule-based and statistical part of speech tagging in agglutinative languages},\n type = {techreport},\n year = {2007},\n source = {Computer engineering},\n volume = {1},\n issue = {1},\n id = {4ef9a84d-1641-3d3f-92cb-12d791197a23},\n created = {2019-10-12T11:01:01.071Z},\n accessed = {2019-10-12},\n file_attached = {true},\n profile_id = {1971c810-6732-3a00-9f6b-d217e1a53071},\n group_id = {cbcfbfec-195f-3b99-b6a1-d26e1dd80ff5},\n last_modified = {2019-10-12T11:01:01.147Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {We present a composite part of speech tagger for Turkish which combines the rule-based and statistical approaches. The tagger makes use of word frequencies and n-gram statistics from a corpus. We use the output of a morphological analyzer in order to get more accurate results and also to eliminate the sparse data problem. In addition, we employ a heuristics about the position of words in the sentences. Although the experiments have been performed on a very small corpus, the results have shown that the use of a composite approach and heuristics improves the accuracy of the tagger. Keywords: agglutinative language, part of speech tagger, rule-based and statistical method},\n bibtype = {techreport},\n author = {Altunyurt, Levent and Orhan, Zihni and Güngör, Tunga}\n}","author_short":["Altunyurt, L.","Orhan, Z.","Güngör, T."],"urls":{"Paper":"https://bibbase.org/service/mendeley/1971c810-6732-3a00-9f6b-d217e1a53071/file/4bceaa63-3e3b-446f-6111-9ff6c0b5ec31/full_text.pdf.pdf"},"bibbaseid":"altunyurt-orhan-gngr-towardscombiningrulebasedandstatisticalpartofspeechtagginginagglutinativelanguages-2007","role":"author","downloads":0},"bibtype":"techreport","creationDate":"2019-10-25T20:27:35.167Z","downloads":0,"keywords":[],"search_terms":["towards","combining","rule","based","statistical","part","speech","tagging","agglutinative","languages","altunyurt","orhan","güngör"],"title":"Towards combining rule-based and statistical part of speech tagging in agglutinative languages","year":2007}