Turkish Datasets for Text Genre Identification

Turkish Datasets for Text Genre Identification. Tüfekci, P., Uzun, E., & Bektaş, M. In 2nd International Conference on Data Science and Applications (ICONDATA’19), pages 72-75, 2019.

Website abstract bibtex 2 downloads

In this study, 7 big data sets have been created to be used in modeling studies which can be a solution to the genre identification problem from text classification problems for Turkish language, and these data sets consist of all the past column writings of the authors of a newspaper until 24.04.2019. These data sets, which were created for the purpose of determining the type of text of the authors, columns, 8 classes and 24,344 texts, 7 classes and 22,442 texts; 6 classes and 20,160 texts; 5 classes and 19,325 texts; 4 classes and 16,328 texts; includes 3 classes and 14,334 texts and 2 classes and 11,698 texts. These datasets were applied to Naive Bayes and Random Forest classification algorithms which are the classical machine learning algorithms. The most successful result was obtained from the Random Forest classifier with 95.45% accuracy with a data set consisting of 2 classes, 5,849 texts in each class and a total of 11,698 texts.

@inproceedings{
 title = {Turkish Datasets for Text Genre Identification},
 type = {inproceedings},
 year = {2019},
 pages = {72-75},
 websites = {https://www.researchgate.net/publication/338232364_Metin_Turu_Belirleme_icin_Turkce_Veri_Setleri_Turkish_Datasets_for_Text_Genre_Identification},
 city = {Balıkesir, TURKEY},
 id = {2e52d268-8e8d-3b4b-bb7c-94df05ab59bf},
 created = {2020-10-01T22:18:11.898Z},
 file_attached = {false},
 profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},
 last_modified = {2020-10-23T11:10:59.169Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Tufekci2019},
 private_publication = {false},
 abstract = {In this study, 7 big data sets have been created to be used in modeling studies which can be a solution to the genre identification problem from text classification problems for Turkish language, and these data sets consist of all the past column writings of the authors of a newspaper until 24.04.2019. These data sets, which were created for the purpose of determining the type of text of the authors, columns, 8 classes and 24,344 texts, 7 classes and 22,442 texts; 6 classes and 20,160 texts; 5 classes and 19,325 texts; 4 classes and 16,328 texts; includes 3 classes and 14,334 texts and 2 classes and 11,698 texts. These datasets were applied to Naive Bayes and Random Forest classification algorithms which are the classical machine learning algorithms. The most successful result was obtained from the Random Forest classifier with 95.45% accuracy with a data set consisting of 2 classes, 5,849 texts in each class and a total of 11,698 texts.},
 bibtype = {inproceedings},
 author = {Tüfekci, Pınar and Uzun, Erdinç and Bektaş, Melike},
 booktitle = {2nd International Conference on Data Science and Applications (ICONDATA’19)},
 keywords = {Genre Identification,Text Classification,Turkish Dataset}
}

Downloads: 2

{"_id":"ayJy8qGSkrmQnM5WT","bibbaseid":"tfekci-uzun-bekta-turkishdatasetsfortextgenreidentification-2019","authorIDs":["QrE2Jk7Eehmqc5trT"],"author_short":["Tüfekci, P.","Uzun, E.","Bektaş, M."],"bibdata":{"title":"Turkish Datasets for Text Genre Identification","type":"inproceedings","year":"2019","pages":"72-75","websites":"https://www.researchgate.net/publication/338232364_Metin_Turu_Belirleme_icin_Turkce_Veri_Setleri_Turkish_Datasets_for_Text_Genre_Identification","city":"Balıkesir, TURKEY","id":"2e52d268-8e8d-3b4b-bb7c-94df05ab59bf","created":"2020-10-01T22:18:11.898Z","file_attached":false,"profile_id":"37fa15c3-e5d0-3212-8e18-e4c72814fd47","last_modified":"2020-10-23T11:10:59.169Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Tufekci2019","private_publication":false,"abstract":"In this study, 7 big data sets have been created to be used in modeling studies which can be a solution to the genre identification problem from text classification problems for Turkish language, and these data sets consist of all the past column writings of the authors of a newspaper until 24.04.2019. These data sets, which were created for the purpose of determining the type of text of the authors, columns, 8 classes and 24,344 texts, 7 classes and 22,442 texts; 6 classes and 20,160 texts; 5 classes and 19,325 texts; 4 classes and 16,328 texts; includes 3 classes and 14,334 texts and 2 classes and 11,698 texts. These datasets were applied to Naive Bayes and Random Forest classification algorithms which are the classical machine learning algorithms. The most successful result was obtained from the Random Forest classifier with 95.45% accuracy with a data set consisting of 2 classes, 5,849 texts in each class and a total of 11,698 texts.","bibtype":"inproceedings","author":"Tüfekci, Pınar and Uzun, Erdinç and Bektaş, Melike","booktitle":"2nd International Conference on Data Science and Applications (ICONDATA’19)","keywords":"Genre Identification,Text Classification,Turkish Dataset","bibtex":"@inproceedings{\n title = {Turkish Datasets for Text Genre Identification},\n type = {inproceedings},\n year = {2019},\n pages = {72-75},\n websites = {https://www.researchgate.net/publication/338232364_Metin_Turu_Belirleme_icin_Turkce_Veri_Setleri_Turkish_Datasets_for_Text_Genre_Identification},\n city = {Balıkesir, TURKEY},\n id = {2e52d268-8e8d-3b4b-bb7c-94df05ab59bf},\n created = {2020-10-01T22:18:11.898Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-10-23T11:10:59.169Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Tufekci2019},\n private_publication = {false},\n abstract = {In this study, 7 big data sets have been created to be used in modeling studies which can be a solution to the genre identification problem from text classification problems for Turkish language, and these data sets consist of all the past column writings of the authors of a newspaper until 24.04.2019. These data sets, which were created for the purpose of determining the type of text of the authors, columns, 8 classes and 24,344 texts, 7 classes and 22,442 texts; 6 classes and 20,160 texts; 5 classes and 19,325 texts; 4 classes and 16,328 texts; includes 3 classes and 14,334 texts and 2 classes and 11,698 texts. These datasets were applied to Naive Bayes and Random Forest classification algorithms which are the classical machine learning algorithms. The most successful result was obtained from the Random Forest classifier with 95.45% accuracy with a data set consisting of 2 classes, 5,849 texts in each class and a total of 11,698 texts.},\n bibtype = {inproceedings},\n author = {Tüfekci, Pınar and Uzun, Erdinç and Bektaş, Melike},\n booktitle = {2nd International Conference on Data Science and Applications (ICONDATA’19)},\n keywords = {Genre Identification,Text Classification,Turkish Dataset}\n}","author_short":["Tüfekci, P.","Uzun, E.","Bektaş, M."],"urls":{"Website":"https://www.researchgate.net/publication/338232364_Metin_Turu_Belirleme_icin_Turkce_Veri_Setleri_Turkish_Datasets_for_Text_Genre_Identification"},"biburl":"https://bibbase.org/service/mendeley/37fa15c3-e5d0-3212-8e18-e4c72814fd47","bibbaseid":"tfekci-uzun-bekta-turkishdatasetsfortextgenreidentification-2019","role":"author","keyword":["Genre Identification","Text Classification","Turkish Dataset"],"metadata":{"authorlinks":{"uzun, e":"https://erdincuzun.com/yayinlar/"}},"downloads":2},"bibtype":"inproceedings","creationDate":"2020-10-02T07:43:05.648Z","downloads":2,"keywords":["genre identification","text classification","turkish dataset"],"search_terms":["turkish","datasets","text","genre","identification","tüfekci","uzun","bektaş"],"title":"Turkish Datasets for Text Genre Identification","year":2019,"biburl":"https://bibbase.org/service/mendeley/37fa15c3-e5d0-3212-8e18-e4c72814fd47","dataSources":["mqdHLrE2gnaRYnL6B","ya2CyA73rpZseyrZ8","2252seNhipfTmjEBQ"]}