Text classification of web based news articles by using Turkish grammatical features. Tüfekci, P., Uzun, E., & Sevinç, B. In 2012 20th Signal Processing and Communications Applications Conference (SIU), pages 1-4, 4, 2012. IEEE.
Text classification of web based news articles by using Turkish grammatical features [link]Website  doi  abstract   bibtex   
The dimensions of the feature vectors being used at the classification methods in the literature affect directly the time performance. In this study, how to reduce the dimension of the feature vector by using Turkish's grammar rules without compromising success rates is explained. The feature vector is weighted on the basis of the word frequency as the word stems have been selected as features. During this selection the effects of selection of the word stems with different length and type to the classification are investigated and when the word stems with noun type and the maximum length are selected as features, the success rate has been found to be at the highest level. When this selection is applied with the other methods which reduce the dimension, the dimension of the feature vector is decreased to 97.46%. Using the reduced feature vector the better succes rates generally have been obtained from Naive Bayes, SVM, C 4.5 and RF classification methods and the best performance achieved is 92.73% which has been obtained using the Naive Bayes method.
@inproceedings{
 title = {Text classification of web based news articles by using Turkish grammatical features},
 type = {inproceedings},
 year = {2012},
 keywords = {NLP,Text classification},
 pages = {1-4},
 websites = {http://ieeexplore.ieee.org/document/6204565/},
 month = {4},
 publisher = {IEEE},
 id = {5ff144c4-6c81-3ac9-a60f-93c82d2fb528},
 created = {2018-03-16T13:30:30.813Z},
 file_attached = {false},
 profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},
 last_modified = {2018-07-04T12:59:46.931Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Tufekci2012},
 private_publication = {false},
 abstract = {The dimensions of the feature vectors being used at the classification methods in the literature affect directly the time performance. In this study, how to reduce the dimension of the feature vector by using Turkish's grammar rules without compromising success rates is explained. The feature vector is weighted on the basis of the word frequency as the word stems have been selected as features. During this selection the effects of selection of the word stems with different length and type to the classification are investigated and when the word stems with noun type and the maximum length are selected as features, the success rate has been found to be at the highest level. When this selection is applied with the other methods which reduce the dimension, the dimension of the feature vector is decreased to 97.46%. Using the reduced feature vector the better succes rates generally have been obtained from Naive Bayes, SVM, C 4.5 and RF classification methods and the best performance achieved is 92.73% which has been obtained using the Naive Bayes method.},
 bibtype = {inproceedings},
 author = {Tüfekci, Pinar and Uzun, Erdinç and Sevinç, Burak},
 doi = {10.1109/SIU.2012.6204565},
 booktitle = {2012 20th Signal Processing and Communications Applications Conference (SIU)}
}

Downloads: 0