İnternet tabanlı bilgi erişimi destekli bir otomatik öğrenme sistemi. Uzun, E. Ph.D. Thesis, 2007.
İnternet tabanlı bilgi erişimi destekli bir otomatik öğrenme sistemi [link]Website  abstract   bibtex   1 download  
This thesis presents a web-based system that is intended to perform the task of automatic acquisition of subcategorization frames for Turkish. As a pro-drop, a referentially sparse and free word order language, Turkish provides an interesting and challenging domain of application for natural language processing tasks. The thesis aims to contribute to the fields of information retrieval, natural language processing and machine learning in the following respects. Firstly, we offer a web-based approach to the automatic construction of corpora to be used in natural language processing and machine learning work. To this effect, we implemented a tool that collects grammatical Turkish sentences from internet via search engines and annotates them with respect to case marking information. Secondly, various machine learning methods were applied to the generated corpus in order to acquire the subcategorization frames of a set of randomly chosen Turkish verbs. Thirdly, we divided our set of patterns into several subsets of different sizes to understand effect of data size on the performance of methods. Lastly, we offer a comparative evaluation of the methods used in our experiments, focusing particularly on the distinction between supervised and unsupervised methods. The thesis is organized as follows. The first chapter gives a brief account of the concepts of information retrieval, subcategorization frame and machine learning. Moreover, this chapter touches upon the relevant literature and the peculiarities of a Turkish as a language to be investigated from a computational point of view. The second chapter introduces some machine learning algorithms and techniques used in our experiments. In the third chapter, we describe the view of web as a corpus that is the largest data set available for natural language studies. In the fourth chapter, the design and implementation aspects of the proposed system are given. The fifth chapter reports on the results of our experiments and provides a comparative evaluation of the methods used in the experiments along with observations on the effect of data size on the performances. The thesis ends with a summary of major findings and conclusions in chapter six.
@phdthesis{
 title = {İnternet tabanlı bilgi erişimi destekli bir otomatik öğrenme sistemi},
 type = {phdthesis},
 year = {2007},
 keywords = {Automatic acquisition of subcategorization frames,Machine learning methods,Web as a corpus},
 websites = {https://tez.yok.gov.tr/UlusalTezMerkezi/TezGoster?key=XohQ0H2mJnBfxLPsY8dG4yoIRmLa5H7omNhxdplRP1-k5w0pbxn25pSMbkEBjQ9F},
 institution = {Trakya University},
 department = {Computer Engineering Department},
 id = {8ee3b073-ca0f-3ea6-973c-e2aff373df3c},
 created = {2018-06-05T12:53:52.311Z},
 file_attached = {false},
 profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},
 last_modified = {2018-07-04T12:59:46.907Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Uzun2010},
 private_publication = {false},
 abstract = {This thesis presents a web-based system that is intended to perform the task of automatic acquisition of subcategorization frames for Turkish. As a pro-drop, a referentially sparse and free word order language, Turkish provides an interesting and challenging domain of application for natural language processing tasks. The thesis aims to contribute to the fields of information retrieval, natural language processing and machine learning in the following respects. Firstly, we offer a web-based approach to the automatic construction of corpora to be used in natural language processing and machine learning work. To this effect, we implemented a tool that collects grammatical Turkish sentences from internet via search engines and annotates them with respect to case marking information. Secondly, various machine learning methods were applied to the generated corpus in order to acquire the subcategorization frames of a set of randomly chosen Turkish verbs. Thirdly, we divided our set of patterns into several subsets of different sizes to understand effect of data size on the performance of methods. Lastly, we offer a comparative evaluation of the methods used in our experiments, focusing particularly on the distinction between supervised and unsupervised methods. The thesis is organized as follows. The first chapter gives a brief account of the concepts of information retrieval, subcategorization frame and machine learning. Moreover, this chapter touches upon the relevant literature and the peculiarities of a Turkish as a language to be investigated from a computational point of view. The second chapter introduces some machine learning algorithms and techniques used in our experiments. In the third chapter, we describe the view of web as a corpus that is the largest data set available for natural language studies. In the fourth chapter, the design and implementation aspects of the proposed system are given. The fifth chapter reports on the results of our experiments and provides a comparative evaluation of the methods used in the experiments along with observations on the effect of data size on the performances. The thesis ends with a summary of major findings and conclusions in chapter six.},
 bibtype = {phdthesis},
 author = {Uzun, Erdinç}
}

Downloads: 1