\n

\n \n 2024\n \n \n (1)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Scraping Relevant Images from Web Pages without Download.\n \n \n \n \n\n\n \n Uzun, E.\n\n\n \n\n\n\n ACM Transactions on the Web, 18(1): 1-27. 2 2024.\n \n\n\n\n
\n\n\n\n \n \n $\"Scraping$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@article{\n title = {Scraping Relevant Images from Web Pages without Download},\n type = {article},\n year = {2024},\n pages = {1-27},\n volume = {18},\n websites = {https://dl.acm.org/doi/10.1145/3616849},\n month = {2},\n day = {29},\n id = {902d0a7b-cbb3-314e-a390-eefed6e6b792},\n created = {2023-08-22T21:04:32.416Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2023-11-13T13:22:04.569Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {false},\n hidden = {false},\n private_publication = {false},\n abstract = {<p>Automatically scraping relevant images from web pages is an error-prone and time-consuming task, leading experts to prefer manually preparing extraction patterns for a website. Existing web scraping tools are built on these patterns. However, this manual approach is laborious and requires specialized knowledge. Automatic extraction approaches, while a potential solution, require large training datasets and numerous features, including width, height, pixels, and file size, that can be difficult and time-consuming to obtain. To address these challenges, we propose a semi-automatic approach that does not require an expert, utilizes small training datasets, and has a low error rate while saving time and storage. Our approach involves clustering web pages from a website and suggesting several pages for a non-expert to annotate relevant images. The approach then uses these annotations to construct a learning model based on textual data from the HTML elements. In the experiments, we used a dataset of 635,015 images from 200 news websites, each containing 100 pages, with 22,632 relevant images. When comparing several machine learning methods for both automatic approaches and our proposed approach, the AdaBoost method yields the best performance results. When using automatic extraction approaches, the best f-Measure that can be achieved is 0.805 with a learning model constructed from a large training dataset consisting of 120 websites (12,000 web pages). In contrast, our approach achieved an average f-Measure of 0.958 for 200 websites with only six web pages annotated per website. This means that a non-expert only needs to examine 1,200 web pages to determine the relevant images for 200 websites. Our approach also saves time and storage space by not requiring the download of images and can be easily integrated into currently available web scraping tools, because it is based on textual data.</p>},\n bibtype = {article},\n author = {Uzun, Erdinç},\n doi = {10.1145/3616849},\n journal = {ACM Transactions on the Web},\n number = {1}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2023\n \n \n (1)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n An efficient regular expression inference approach for relevant image extraction.\n \n \n \n \n\n\n \n Agun, H., V.; and Uzun, E.\n\n\n \n\n\n\n Applied Soft Computing, 135: 110030. 3 2023.\n \n\n\n\n
\n\n\n\n \n \n $\"An$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@article{\n title = {An efficient regular expression inference approach for relevant image extraction},\n type = {article},\n year = {2023},\n pages = {110030},\n volume = {135},\n websites = {https://linkinghub.elsevier.com/retrieve/pii/S1568494623000480},\n month = {3},\n id = {cd6bfdfb-0435-32cc-8bfa-126e03587018},\n created = {2023-01-15T15:48:22.087Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2023-08-22T20:45:20.806Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n private_publication = {false},\n abstract = {Traditional approaches for extracting relevant images automatically from web pages are error-prone and time-consuming. To improve this task, operations such as preparing a larger dataset and finding new features are used in the web data extraction approaches. However, these operations are difficult and laborious. In this study, we propose a fully-automated approach based on alignment of regular expressions to automatically extract the relevant images from web pages. The automatically constructed regular expressions has been applied to a classification task for the first time. In this respect, a multi-stage inference approach is developed for generating regular expressions from the attribute values of relevant and irrelevant image elements in web pages. The proposed approach reduces the complexity of the alignment of two regular expressions by applying a constraint on a version of the Levenshtein distance algorithm. The classification accuracy of regular expression approaches is compared with the naive Bayes,logistic regression, J48, and multilayer perceptron classifiers on a balanced relevant image retrieval dataset consisting of 360 image element samples for 10 shopping websites. According to the cross-validation results, the regular expression inference-based classification achieved a 0.98 f-measure with only 5 frequent n-grams, and it outperformed other classifiers on the same set of features. The classification efficiency of the proposed approach is measured at 0.108 ms, which is very competitive with other classifiers.},\n bibtype = {article},\n author = {Agun, Hayri Volkan and Uzun, Erdinç},\n doi = {10.1016/j.asoc.2023.110030},\n journal = {Applied Soft Computing}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2021\n \n \n (1)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n The Analysis of Big Financial Data Through Artificial Intelligence Methods.\n \n \n \n \n\n\n \n Ozhan, E.; and Uzun, E.\n\n\n \n\n\n\n The Impact of Artificial Intelligence on Governance, Economics and Finance, pages 51-79. Sezer Bozkuş Kahyaoğlu, editor(s). Springer, 2021.\n \n\n\n\n
\n\n\n\n \n \n $\"The$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inbook{\n type = {inbook},\n year = {2021},\n pages = {51-79},\n websites = {https://link.springer.com/10.1007/978-981-33-6811-8_4},\n publisher = {Springer},\n id = {100b9467-018a-31ab-9fb8-8a64c4fc7940},\n created = {2021-05-04T08:07:08.727Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2023-01-15T15:50:42.584Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n private_publication = {false},\n abstract = {A new data world which never get deformed, can be reached from anywhere, continuously stream and multiply, emerged with the evolution of technology. The data, in particular, created by business firms, scientific research centers, and automation systems reached great amounts. It has become the main target of many data analysts to reach meaningful, unexplored, and valuable information or deductions among these piles of data. In this chapter, firstly the techniques of artificial intelligence and the skills of these techniques were discussed. Later, the mostly-used techniques in the finance sector, the advantages and weaknesses of these techniques, and the methods which can be used to process the data created by the finance sector, which creates big data and is one of the leading sources, was comparatively shown. The current version of the mostly-used artificial intelligence methods in the finance sector was scanned and the new skills and contributions it provides to the sector were examined. What Classification, clustering, association rules, and time series analysis methods, in particular, cover and what problems they can produce solutions to were examined and the readers were informed about these techniques. It was aimed to give information about forming credit score and customer segmentation, where classification and clustering methods are especially employed, with sample studies. It was aimed to present the principles the up-to-date methods are based on and their theoretical and practical applications in a meaningful way. In addition to these, information about practical and useful software that can be used for data analysis in the finance sector was given and the skills of this software were conveyed to the readers. Finally, how the techniques of processing big data can be used was examined through samples as the finance data are classified as big data. The difficulties met during the analysis of big data, a natural result created by this sector, and solutions to them were presented. Updated big data processing solutions like Hadoop, Spark, MapReduce, Distributed computing, and GPU (Graphics Processing Unit) computing, in particular, were comparatively explained. The main principles that big data processing techniques are based on were simplified in a way that the readers could understand and were supported by examples from the sector. Especially, Spark, Hadoop, and MapReduce methods, which are leading methods in processing big data, were examined. Finally, the contributions made to the sector by artificial intelligence and big data processing techniques were generally summarized and the results were presented.},\n bibtype = {inbook},\n author = {Ozhan, Erkan and Uzun, Erdinç},\n editor = {Sezer Bozkuş Kahyaoğlu, undefined},\n doi = {10.1007/978-981-33-6811-8_4},\n chapter = {The Analysis of Big Financial Data Through Artificial Intelligence Methods},\n title = {The Impact of Artificial Intelligence on Governance, Economics and Finance},\n keywords = {Artificial intelligence,Association rules,Big data,Classification,Clustering,Financial data}\n}

\n

\n\n\n

\n A new data world which never get deformed, can be reached from anywhere, continuously stream and multiply, emerged with the evolution of technology. The data, in particular, created by business firms, scientific research centers, and automation systems reached great amounts. It has become the main target of many data analysts to reach meaningful, unexplored, and valuable information or deductions among these piles of data. In this chapter, firstly the techniques of artificial intelligence and the skills of these techniques were discussed. Later, the mostly-used techniques in the finance sector, the advantages and weaknesses of these techniques, and the methods which can be used to process the data created by the finance sector, which creates big data and is one of the leading sources, was comparatively shown. The current version of the mostly-used artificial intelligence methods in the finance sector was scanned and the new skills and contributions it provides to the sector were examined. What Classification, clustering, association rules, and time series analysis methods, in particular, cover and what problems they can produce solutions to were examined and the readers were informed about these techniques. It was aimed to give information about forming credit score and customer segmentation, where classification and clustering methods are especially employed, with sample studies. It was aimed to present the principles the up-to-date methods are based on and their theoretical and practical applications in a meaningful way. In addition to these, information about practical and useful software that can be used for data analysis in the finance sector was given and the skills of this software were conveyed to the readers. Finally, how the techniques of processing big data can be used was examined through samples as the finance data are classified as big data. The difficulties met during the analysis of big data, a natural result created by this sector, and solutions to them were presented. Updated big data processing solutions like Hadoop, Spark, MapReduce, Distributed computing, and GPU (Graphics Processing Unit) computing, in particular, were comparatively explained. The main principles that big data processing techniques are based on were simplified in a way that the readers could understand and were supported by examples from the sector. Especially, Spark, Hadoop, and MapReduce methods, which are leading methods in processing big data, were examined. Finally, the contributions made to the sector by artificial intelligence and big data processing techniques were generally summarized and the results were presented.\n

\n\n\n

\n\n\n\n\n\n

\n

\n \n 2020\n \n \n (4)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n A Novel Web Scraping Approach Using the Additional Information Obtained from Web Pages.\n \n \n \n \n\n\n \n Uzun, E.\n\n\n \n\n\n\n IEEE Access, 8: 61726-61740. 2020.\n \n\n\n\n
\n\n\n\n \n \n $\"A$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 26 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {A Novel Web Scraping Approach Using the Additional Information Obtained from Web Pages},\n type = {article},\n year = {2020},\n keywords = {Computational efficiency,algorithm design and analysis,document object model,web crawling and scraping},\n pages = {61726-61740},\n volume = {8},\n websites = {https://ieeexplore.ieee.org/document/9051800/},\n id = {c7310033-c0dd-3611-a38f-7afa4d3bf321},\n created = {2020-04-10T13:52:17.189Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.228Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2020b},\n private_publication = {false},\n abstract = {Web scraping is a process of extracting valuable and interesting text information from web pages. Most of the current studies targeting this task are mostly about automated web data extraction. In the extraction process, these studies first create a DOM tree and then access the necessary data through this tree. The construction process of this tree increases the time cost depending on the data structure of the DOM Tree. In the current web scraping literature, it is observed that time efficiency is ignored. This study proposes a novel approach, namely UzunExt, which extracts content quickly using the string methods and additional information without creating a DOM Tree. The string methods consist of the following consecutive steps: searching for a given pattern, then calculating the number of closing HTML elements for this pattern, and finally extracting content for the pattern. In the crawling process, our approach collects the additional information, including the starting position for enhancing the searching process, the number of inner tag for improving the extraction process, and tag repetition for terminating the extraction process. The string methods of this novel approach are about 60 times faster than extracting with the DOM-based method. Moreover, using these additional information improves extraction time by 2.35 times compared to using only the string methods. Furthermore, this approach can easily be adapted to other DOM-based studies/parsers in this task to enhance their time efficiencies.},\n bibtype = {article},\n author = {Uzun, Erdinc},\n doi = {10.1109/ACCESS.2020.2984503},\n journal = {IEEE Access}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n A regular expression generator based on CSS selectors for efficient extraction from HTML pages.\n \n \n \n \n\n\n \n Uzun, E.\n\n\n \n\n\n\n Turkish Journal of Electrical Engineering & Computer Sciences, 28: 3389-3401. 2020.\n \n\n\n\n
\n\n\n\n \n \n $\"A$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 24 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {A regular expression generator based on CSS selectors for efficient extraction from HTML pages},\n type = {article},\n year = {2020},\n pages = {3389-3401},\n volume = {28},\n websites = {https://journals.tubitak.gov.tr/elektrik/issues/elk-20-28-6/elk-28-6-20-2004-67.pdf},\n publisher = {The Scientific and Technological Research Council of Turkey},\n id = {4ff5947d-a4bf-3527-ba91-201d1abf5a70},\n created = {2020-11-24T06:53:12.740Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.399Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {true},\n citation_key = {Uzun2020a},\n private_publication = {false},\n abstract = {Cascading style sheets (CSS) selectors are patterns used to select HTML elements. They are often preferred in web data extraction because they are easy to prepare and have short expressions. In order to be able to extract data from web pages by using these patterns, a document object model (DOM) tree is constructed by an HTML parser for a webpage. The construction process of this tree and the extraction process using this tree increase time and memory costs depending on the number of HTML elements and their hierarchies. For reducing these costs, regular expressions can be considered as a solution. However, preparing regular expression patterns is a laborious task. In this study, a heuristic approach, namely Regex Generator (REGEXN), that automatically generates these patterns through CSS selectors is introduced and th eperformance gains are analyzed on a web crawler. The analysis shows that regular expression patterns generated by this approach can significantly reduce the average extraction time results from 743.31 ms to 1.03 ms when compared with the extraction process from a DOM tree. Similarly, the average memory usage drops from 1054.01 B to 1.59 B. Moreover, REGEXN can be easily adapted to the existing frameworks and tools in this task.},\n bibtype = {article},\n author = {Uzun, Erdinç},\n doi = {10.3906/elk-2004-67},\n journal = {Turkish Journal of Electrical Engineering & Computer Sciences},\n keywords = {Web data extraction,computational efficiency,heuristic algorithms,regular expressions}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Use of Machine Learning Algorithms In Classification Of DNA Gene Sequences: An Empiric Research on The Prometer And Splice Junction Data Set.\n \n \n \n \n\n\n \n Özhan, E.; and Uzun, E.\n\n\n \n\n\n\n In 2. International Icontech Symposium on Innovative Surveys in Positive Sciences, pages 144-157, 2020. \n \n\n\n\n
\n\n\n\n \n \n $\"Use$ Paper\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Use of Machine Learning Algorithms In Classification Of DNA Gene Sequences: An Empiric Research on The Prometer And Splice Junction Data Set},\n type = {inproceedings},\n year = {2020},\n pages = {144-157},\n city = {Budapest, Hungary},\n id = {842f6f9f-2202-3f24-8503-0d30c0e88894},\n created = {2020-11-24T07:34:22.014Z},\n file_attached = {true},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.454Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ozhan2020},\n private_publication = {false},\n abstract = {Artificial Intelligence technologies can provide effective solutions in understanding the increasingly complex multi-dimensional data and obtaining meaningful information. In particular, as the number of attributes of the data, the complexity and size of the problem increase, the discovery of meaningful relationships, comprehensibility and interpretation of the data become increasingly difficult. To overcome this difficulty, algorithms developed by machine learning methods, a sub-branch of artificial intelligence, can be useful. Classification of DNA gene sequences is one of the difficult problems to solve, especially in molecular biology. This study investigates the applicability of machine learning algorithms, a sub-branch of artificial intelligence methods, on two data sets that contain the DNA sequence analysis problem and the effects of new approaches on this problem. For this purpose, previously untested machine learning algorithms and tools were used and new findings were achieved. Especially, Auto-Weka tool, which was developed to determine algorithms that offer optimum performance together with their parameters, was found to be effective. The findings show that the performance rates previously obtained on this data set can be improved with new approaches. In particular, it has been observed that Random Forest and SMO (Sequential minimal optimization) algorithms significantly increase classification performance rates compared to previous studies with the determination of appropriate parameter settings. In this study, in addition to increasing the performance of classification algorithms, feature reduction method, which has not been tried in previous researches, has also been considered and it has been shown that the number of features can be reduced significantly to increase the performance rate. In particular, it has been observed that the CfsSubsetEval algorithm can detect the most important gene sequences that affect the classification by significantly reducing the input parameters. In particular, it has been observed that the CfsSubsetEval algorithm can detect the most important gene sequences that affect the classification by significantly reducing the input parameters in Prometer data with the GreedyStepwise search method. The same algorithm has managed to reduce the number of attributes in the Slipce Junction data set at the optimum level with the BestFirst search method. Thus, it is possible to use hardware resources such as computational requirements and storage in the processing of DNA gene sequences more efficiently and to get the result quickly.},\n bibtype = {inproceedings},\n author = {Özhan, Erkan and Uzun, Erdinç},\n booktitle = {2. International Icontech Symposium on Innovative Surveys in Positive Sciences},\n keywords = {DNA Gene Sequencing,Data Classification,Feature Reduction,Machine Learning}\n}

\n

\n\n\n

\n Artificial Intelligence technologies can provide effective solutions in understanding the increasingly complex multi-dimensional data and obtaining meaningful information. In particular, as the number of attributes of the data, the complexity and size of the problem increase, the discovery of meaningful relationships, comprehensibility and interpretation of the data become increasingly difficult. To overcome this difficulty, algorithms developed by machine learning methods, a sub-branch of artificial intelligence, can be useful. Classification of DNA gene sequences is one of the difficult problems to solve, especially in molecular biology. This study investigates the applicability of machine learning algorithms, a sub-branch of artificial intelligence methods, on two data sets that contain the DNA sequence analysis problem and the effects of new approaches on this problem. For this purpose, previously untested machine learning algorithms and tools were used and new findings were achieved. Especially, Auto-Weka tool, which was developed to determine algorithms that offer optimum performance together with their parameters, was found to be effective. The findings show that the performance rates previously obtained on this data set can be improved with new approaches. In particular, it has been observed that Random Forest and SMO (Sequential minimal optimization) algorithms significantly increase classification performance rates compared to previous studies with the determination of appropriate parameter settings. In this study, in addition to increasing the performance of classification algorithms, feature reduction method, which has not been tried in previous researches, has also been considered and it has been shown that the number of features can be reduced significantly to increase the performance rate. In particular, it has been observed that the CfsSubsetEval algorithm can detect the most important gene sequences that affect the classification by significantly reducing the input parameters. In particular, it has been observed that the CfsSubsetEval algorithm can detect the most important gene sequences that affect the classification by significantly reducing the input parameters in Prometer data with the GreedyStepwise search method. The same algorithm has managed to reduce the number of attributes in the Slipce Junction data set at the optimum level with the BestFirst search method. Thus, it is possible to use hardware resources such as computational requirements and storage in the processing of DNA gene sequences more efficiently and to get the result quickly.\n

\n\n\n

\n \n\n \n \n \n \n \n \n Automatically Discovering Relevant Images From Web Pages.\n \n \n \n \n\n\n \n Uzun, E.; Ozhan, E.; Agun, H., V.; Yerlikaya, T.; and Bulus, H., N.\n\n\n \n\n\n\n IEEE Access, 8: 208910-208921. 2020.\n \n\n\n\n
\n\n\n\n \n \n $\"Automatically$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 28 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {Automatically Discovering Relevant Images From Web Pages},\n type = {article},\n year = {2020},\n keywords = {Crawlers,Feature extraction,Layout,Machine learning,Predictive models,Task analysis,Web pages},\n pages = {208910-208921},\n volume = {8},\n websites = {https://ieeexplore.ieee.org/document/9262879/},\n id = {47c76129-a0ac-3f23-b341-6c5257551ddd},\n created = {2020-11-28T08:33:06.663Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.390Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2020},\n private_publication = {false},\n abstract = {Web pages contain irrelevant images along with relevant images. The classification of these images is an error-prone process due to the number of design variations of web pages. Using multiple web pages provides additional features that improve the performance of relevant image extraction. Traditional studies use the features extracted from a single web page. However, in this study, we enhance the performance of relevant image extraction by employing the features extracted from different web pages consisting of standard news, galleries, video pages, and link pages. The dataset obtained from these web pages contains 100 different web pages for each 200 online news websites from 58 different countries. For discovering relevant images, the most straightforward approach extracts the largest image on the web page. This approach achieves a 0.451 F-Measure score as a baseline. Then, we apply several machine learning methods using features in this dataset to find the most suitable machine learning method. The best f-Measure score is 0.822 using the AdaBoost classifier. Some of these features have been utilized in previous web data extraction studies. To the best of our knowledge, 15 new features are proposed for the first time in this study for discovering the relevant images. We compare the performance of the AdaBoost classifier on different feature sets. The proposed features improve the f-Measure by 35 percent. Besides, using only the cache feature, which is the most prominent feature, corresponds to 7 percent of this improvement.},\n bibtype = {article},\n author = {Uzun, Erdinc and Ozhan, Erkan and Agun, Hayri Volkan and Yerlikaya, Tarik and Bulus, Halil Nusret},\n doi = {10.1109/access.2020.3039044},\n journal = {IEEE Access}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2019\n \n \n (2)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Predicting CO and NOx emissions from gas turbines: Novel data and a benchmark PEMS.\n \n \n \n \n\n\n \n Kaya, H.; Tüfekci, P.; and Uzun, E.\n\n\n \n\n\n\n Turkish Journal of Electrical Engineering and Computer Sciences, 27(6): 4783-4796. 11 2019.\n \n\n\n\n
\n\n\n\n \n \n $\"Predicting$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {Predicting CO and NOx emissions from gas turbines: Novel data and a benchmark PEMS},\n type = {article},\n year = {2019},\n keywords = {CO,Database,Exhaust emission prediction,Extreme learning machine,Gas turbines,NOx,Predictive emission monitoring systems},\n pages = {4783-4796},\n volume = {27},\n websites = {http://online.journals.tubitak.gov.tr/openDoiPdf.htm?mKodu=elk-1807-87},\n month = {11},\n day = {26},\n id = {710ca243-ad29-36a4-86cd-f17f0fc7ae5e},\n created = {2019-11-29T19:28:23.249Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.531Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Kaya2019},\n private_publication = {false},\n abstract = {Predictive emission monitoring systems (PEMS) are important tools for validation and backing up of costly continuous emission monitoring systems used in gas-turbine-based power plants. Their implementation relies on the availability of appropriate and ecologically valid data. In this paper, we introduce a novel PEMS dataset collected over five years from a gas turbine for the predictive modeling of the CO and NOx emissions. We analyze the data using a recent machine learning paradigm, and present useful insights about emission predictions. Furthermore, we present a benchmark experimental procedure for comparability of future works on the data.},\n bibtype = {article},\n author = {Kaya, Heysem and Tüfekci, Pınar and Uzun, Erdinç},\n doi = {10.3906/ELK-1807-87},\n journal = {Turkish Journal of Electrical Engineering and Computer Sciences},\n number = {6}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Turkish Datasets for Text Genre Identification.\n \n \n \n \n\n\n \n Tüfekci, P.; Uzun, E.; and Bektaş, M.\n\n\n \n\n\n\n In 2nd International Conference on Data Science and Applications (ICONDATA’19), pages 72-75, 2019. \n \n\n\n\n
\n\n\n\n \n \n $\"Turkish$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Turkish Datasets for Text Genre Identification},\n type = {inproceedings},\n year = {2019},\n pages = {72-75},\n websites = {https://www.researchgate.net/publication/338232364_Metin_Turu_Belirleme_icin_Turkce_Veri_Setleri_Turkish_Datasets_for_Text_Genre_Identification},\n city = {Balıkesir, TURKEY},\n id = {2e52d268-8e8d-3b4b-bb7c-94df05ab59bf},\n created = {2020-10-01T22:18:11.898Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-10-23T11:10:59.169Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Tufekci2019},\n private_publication = {false},\n abstract = {In this study, 7 big data sets have been created to be used in modeling studies which can be a solution to the genre identification problem from text classification problems for Turkish language, and these data sets consist of all the past column writings of the authors of a newspaper until 24.04.2019. These data sets, which were created for the purpose of determining the type of text of the authors, columns, 8 classes and 24,344 texts, 7 classes and 22,442 texts; 6 classes and 20,160 texts; 5 classes and 19,325 texts; 4 classes and 16,328 texts; includes 3 classes and 14,334 texts and 2 classes and 11,698 texts. These datasets were applied to Naive Bayes and Random Forest classification algorithms which are the classical machine learning algorithms. The most successful result was obtained from the Random Forest classifier with 95.45% accuracy with a data set consisting of 2 classes, 5,849 texts in each class and a total of 11,698 texts.},\n bibtype = {inproceedings},\n author = {Tüfekci, Pınar and Uzun, Erdinç and Bektaş, Melike},\n booktitle = {2nd International Conference on Data Science and Applications (ICONDATA’19)},\n keywords = {Genre Identification,Text Classification,Turkish Dataset}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2018\n \n \n (8)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Veritabanı Tasarımının Yazılım Performansına Etkisi: Normalizasyona karşı Denormalizasyon.\n \n \n \n \n\n\n \n Uzun, E.; Buluş, H., N.; and Erdoğan, C.\n\n\n \n\n\n\n Süleyman Demirel Üniversitesi Fen Bilimleri Enstitüsü Dergisi, 22(2): 887. 2 2018.\n \n\n\n\n
\n\n\n\n \n \n $\"Veritabanı$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 12 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {Veritabanı Tasarımının Yazılım Performansına Etkisi: Normalizasyona karşı Denormalizasyon},\n type = {article},\n year = {2018},\n keywords = {CAP theorem,Database design,Denormalization,Indexing,Normalization,Software performance},\n pages = {887},\n volume = {22},\n websites = {https://dergipark.org.tr/tr/download/article-file/529039},\n month = {2},\n day = {16},\n id = {2d4981c0-2c0a-31a6-a8b9-13cd90cfd0a9},\n created = {2018-06-05T12:53:51.315Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2023-08-22T20:52:46.233Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2018_SDFBD},\n folder_uuids = {bc101896-c263-4799-878a-a9fddc1af501},\n private_publication = {false},\n abstract = {One of the most important factors affecting software performance is the improvements that can be made in database design. The normalization process, which is based on the relational database theory, is often used in database design. However, as the amount of data increases, performance problems arise due to the normalization process. In order to overcome the performance problems, denormalization without theoretical process is utilized. In this study, a performance enhancement database design is introduced in a survey application and the performance improvements of three different relational database management systems including MySQL, PostgreSQL and Oracle are examined. In addition, it is explained through CAP theory when to pass to NSQL, one of today's popular database systems, and the place of normalization and denormalization processes in this theory.},\n bibtype = {article},\n author = {Uzun, Erdinç and Buluş, Halil Nusret and Erdoğan, Cihat},\n doi = {10.19113/sdufbed.02548},\n journal = {Süleyman Demirel Üniversitesi Fen Bilimleri Enstitüsü Dergisi},\n number = {2}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Object-based Entity Relationship Diagram Drawing Library: EntRel.JS.\n \n \n \n \n\n\n \n Uzun, E.; Yerlikaya, T.; and Kırat, O.\n\n\n \n\n\n\n In 7th International Scientific Conference “TechSys 2018” – Engineering, Technologies and Systems, Technical University of Sofia, Plovdiv Branch May 17-19, pages 114-119, 2018. \n \n\n\n\n
\n\n\n\n \n \n $\"Object-based$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 4 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Object-based Entity Relationship Diagram Drawing Library: EntRel.JS},\n type = {inproceedings},\n year = {2018},\n keywords = {Entity relationship diagrams,JavaScript,SVG},\n pages = {114-119},\n websites = {https://erdincuzun.com/wp-content/uploads/download/plovdiv_2018_02.pdf},\n id = {bbaccaac-c3e3-324b-bf17-122c3af80016},\n created = {2018-07-03T11:58:15.031Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.191Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2018_01_Plovdiv},\n private_publication = {false},\n abstract = {An entity relationship diagram (ERD) is a visual helper for database design. ERD gives information about the relations of entity sets and the logical structure of databases. In this paper, we introduce an open source JavaScript Library named EntRel.JS in order to design sophisticated ERDs by writing simple codes. This library, which we have developed to facilitate this task, is based on our obfc.js library. It generates a SVG output on the client side for modern browsers. The SVG output provides storage efficiency when compared to existing ERD drawings created with existing drawing applications. Moreover, we present our animation library to gain action for elements in your web page.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Yerlikaya, Tarık and Kırat, Oğuz},\n booktitle = {7th International Scientific Conference “TechSys 2018” – Engineering, Technologies and Systems, Technical University of Sofia, Plovdiv Branch May 17-19}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Comparison of Python Libraries used for Web Data Extraction.\n \n \n \n \n\n\n \n Uzun, E.; Yerlikaya, T.; and Kırat, O.\n\n\n \n\n\n\n In 7th International Scientific Conference “TechSys 2018” – Engineering, Technologies and Systems, Technical University of Sofia, Plovdiv Branch May 17-19, pages 108-113, 2018. \n \n\n\n\n
\n\n\n\n \n \n $\"Comparison$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Comparison of Python Libraries used for Web Data Extraction},\n type = {inproceedings},\n year = {2018},\n keywords = {DOM,Performance evaluation,Python,Web content extraction},\n pages = {108-113},\n websites = {https://erdincuzun.com/wp-content/uploads/download/plovdiv_2018_01.pdf},\n id = {6f3fb081-c2f4-39ad-a727-96d5414f849d},\n created = {2018-07-03T11:58:15.189Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.349Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2018_Plovdiv},\n private_publication = {false},\n abstract = {There are several libraries for extracting useful data from web pages in Python. In this study, we compare three different well-known extraction libraries including BeautifulSoup, lxml and regex. The experimental results indicate that regex achieves the best results with an average of 0.071 ms. However, it is difficult to generate correct extraction rules for regex when the number of inner elements is not known. In experiments, only %43.5 of the extraction rules are suitable for this task. In this case, BeautifulSoup and lxml, which are the DOM-based libraries, are used for extraction process. In experiments, lxml library yields the best results with an average of 9.074 ms.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Yerlikaya, Tarık and Kırat, Oğuz},\n booktitle = {7th International Scientific Conference “TechSys 2018” – Engineering, Technologies and Systems, Technical University of Sofia, Plovdiv Branch May 17-19}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Comparison of Python Libraries used for Web Data Extraction.\n \n \n \n \n\n\n \n Uzun, E.; Yerlikaya, T.; and Kırat, O.\n\n\n \n\n\n\n Journal of the Technical University - Sofia Plovdiv branch, Bulgaria, 24: 87-92. 2018.\n \n\n\n\n
\n\n\n\n \n \n $\"Comparison$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@article{\n title = {Comparison of Python Libraries used for Web Data Extraction},\n type = {article},\n year = {2018},\n pages = {87-92},\n volume = {24},\n websites = {https://erdincuzun.com/wp-content/uploads/download/plovdiv_journal_2018_01.pdf},\n id = {8aefdf92-edeb-3702-b6f0-3d9bb49b7b4d},\n created = {2019-01-17T06:45:34.673Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.398Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {false},\n hidden = {false},\n citation_key = {Uzun2018b},\n private_publication = {false},\n abstract = {There are several libraries for extracting useful data from web pages in Python. In this study, we compare three different well-known extraction libraries including BeautifulSoup, lxml and regex. The experimental results indicate that regex achieves the best results with an average of 0.071 ms. However, it is difficult to generate correct extraction rules for regex when the number of inner elements is not known. In experiments, only %43.5 of the extraction rules are suitable for this task. In this case, BeautifulSoup and lxml, which are the DOM-based libraries, are used for extraction process. In experiments, lxml library yields the best results with an average of 9.074 ms.},\n bibtype = {article},\n author = {Uzun, Erdinç and Yerlikaya, Tarık and Kırat, Oğuz},\n journal = {Journal of the Technical University - Sofia Plovdiv branch, Bulgaria}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Object-based Entity Relationship Diagram Drawing Library: EntRel.JS.\n \n \n \n \n\n\n \n Uzun, E.; Yerlikaya, T.; and Kırat, O.\n\n\n \n\n\n\n Journal of the Technical University - Sofia Plovdiv branch, Bulgaria, 24: 93-98. 2018.\n \n\n\n\n
\n\n\n\n \n \n $\"Object-based$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 4 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@article{\n title = {Object-based Entity Relationship Diagram Drawing Library: EntRel.JS},\n type = {article},\n year = {2018},\n pages = {93-98},\n volume = {24},\n websites = {https://erdincuzun.com/wp-content/uploads/download/plovdiv_journal_2018_02.pdf},\n id = {4f85f553-926d-35ce-b83e-c9948b5dc8f3},\n created = {2019-01-17T06:48:08.218Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.410Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2018a},\n private_publication = {false},\n abstract = {An entity relationship diagram (ERD) is a visual helper for database design. ERD gives information about the relations of entity sets and the logical structure of databases. In this paper, we introduce an open source JavaScript Library named EntRel.JS in order to design sophisticated ERDs by writing simple codes. This library, which we have developed to facilitate this task, is based on our obfc.js library. It generates a SVG output on the client side for modern browsers. The SVG output provides storage efficiency when compared to existing ERD drawings created with existing drawing applications. Moreover, we present our animation library to gain action for elements in your web page.},\n bibtype = {article},\n author = {Uzun, Erdinç and Yerlikaya, Tarık and Kırat, Oğuz},\n journal = {Journal of the Technical University - Sofia Plovdiv branch, Bulgaria}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Examining the Impact of Feature Selection on Classification of User Reviews in Web Pages.\n \n \n \n \n\n\n \n Uzun, E.; and Özhan, E.\n\n\n \n\n\n\n In 2018 International Conference on Artificial Intelligence and Data Processing, IDAP 2018, pages 430-437, 9 2018. IEEE\n \n\n\n\n
\n\n\n\n \n \n $\"Examining$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Examining the Impact of Feature Selection on Classification of User Reviews in Web Pages},\n type = {inproceedings},\n year = {2018},\n keywords = {classification methods,feature selection,imbalanced dataset,review layout detection,web data extraction},\n pages = {430-437},\n websites = {https://ieeexplore.ieee.org/document/8620774/},\n month = {9},\n publisher = {IEEE},\n city = {Malatya, Turkey},\n id = {122b4e2c-e9ef-326e-9cee-7684cbb746b8},\n created = {2019-01-17T06:52:15.704Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2022-04-08T18:45:10.149Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2018},\n private_publication = {false},\n abstract = {The user reviews in web pages can provide useful information about the content of the web page for text processing applications. Automatically extracting data from a web page is a crucial process for these applications. One of the used methods in this process is to construct a learning model with an appropriate classification method using features that are derived from data. However, some features can be either redundant or irrelevant for this model. In this study, an imbalanced dataset including 47 shallow text features obtained from web pages is utilized for extracting of the user reviews. Then, various well-known feature selection techniques are applied to reduce the number of these features. The effects of this reduction on the classification methods are also examined. The experimental results indicate that approximately half of the features are sufficient for the classification task. Additionally, the AdaBoost classifier gives the best results concerning precision of about 0.930 for the review layout prediction.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Özhan, Erkan},\n doi = {10.1109/IDAP.2018.8620774},\n booktitle = {2018 International Conference on Artificial Intelligence and Data Processing, IDAP 2018}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Performance Evaluation of Classification Methods in Layout Prediction of Web Pages.\n \n \n \n \n\n\n \n Özhan, E.; and Uzun, E.\n\n\n \n\n\n\n In International Conference on Artificial Intelligence and Data Processing (IDAP 2018), pages 438-444, 2018. \n \n\n\n\n
\n\n\n\n \n \n $\"Performance$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Performance Evaluation of Classification Methods in Layout Prediction of Web Pages},\n type = {inproceedings},\n year = {2018},\n pages = {438-444},\n websites = {https://ieeexplore.ieee.org/document/8620893},\n city = {Malatya, Turkey},\n id = {b9578d17-9e8e-36d2-81cf-7b53b73809ec},\n created = {2019-01-17T06:54:23.448Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-10-23T11:10:59.187Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ozhan2018},\n private_publication = {false},\n abstract = {The Web is an invaluable source of data stored on web pages. These data are contained in HTML layout elements of a web page. It is a crucial issue to extract data automatically from a web page. In this study, a dataset, which is annotated with seven different layouts including main content, headline, summary, other necessary layouts, menu, link, and other unnecessary layouts, is used. Then, 49 different features are computed from these layouts. Finally, we compare the different classification methods for evaluating the performance of these methods in layout prediction. The experiments show that the Random Forest classifier achieves a high accuracy of 98.46%. Thanks to this classifier, the prediction of link layout has a higher performance (approximately 0.988 f-Measure) according to the performance of the prediction of other layouts. On the other hand, the prediction of the summary layout has the worst performance with about 0.882 f-Measure.},\n bibtype = {inproceedings},\n author = {Özhan, Erkan and Uzun, Erdinç},\n doi = {https://doi.org/10.1109/IDAP.2018.8620893},\n booktitle = {International Conference on Artificial Intelligence and Data Processing (IDAP 2018)}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Web Veri Çıkarımda Çıkarım Kurallarının İncelenmesi.\n \n \n \n \n\n\n \n UZUN, E.; Yerlikaya, T.; and Kırat, O.\n\n\n \n\n\n\n European Journal of Engineering and Applied Sciences, 1(2): 72-77. 12 2018.\n \n\n\n\n
\n\n\n\n \n \n $\"Web$ Paper\n \n \n \n $\"Web$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@article{\n title = {Web Veri Çıkarımda Çıkarım Kurallarının İncelenmesi},\n type = {article},\n year = {2018},\n pages = {72-77},\n volume = {1},\n websites = {https://dergipark.org.tr/en/pub/ejeas/issue/41931/486132},\n month = {12},\n day = {30},\n id = {1dd55ee6-4231-36c8-8458-4036b2f34f91},\n created = {2020-04-10T14:21:02.101Z},\n accessed = {2020-04-10},\n file_attached = {true},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T14:00:57.229Z},\n read = {true},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {UZUN2018},\n private_publication = {false},\n abstract = {Extracting the desired data from the web page is important issue for applications in the fields of data mining and information retrieval. DOM-based methods or regular expressions can be used to extract data from a web page. For this extraction process, multiple extraction rules can be prepared for both DOM-based methods and regular expressions. In this study, the effectiveness of obtaining more than one data with extraction rules is investigated. As a data set, fifteen websites including in the fields of news, film and shopping have been selected. Extraction rule files have been created for data extraction with different extraction techniques for these websites. Web sites are mainly focused on repetitive data such as reviews. Experiments have shown that regular expressions, the creation process is more laborious and time consuming, give better results than DOM-based methods. Among the DOM-based methods, the lxml parser library provided the best results as expected. Experiments indicate that the extraction rules prepared by a developer affect the extraction time. As a result, it is possible to extract the desired data much faster in web pages with the well-prepared regular expressions.},\n bibtype = {article},\n author = {UZUN, Erdinç and Yerlikaya, Tarık and Kırat, Oğuz},\n journal = {European Journal of Engineering and Applied Sciences},\n number = {2}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2017\n \n \n (4)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Object-based flowchart drawing library.\n \n \n \n \n\n\n \n Uzun, E.; and Buluş, H., N.\n\n\n \n\n\n\n In 2017 International Conference on Computer Science and Engineering (UBMK), pages 110-115, 2017. \n \n\n\n\n
\n\n\n\n \n \n $\"Object-based$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Object-based flowchart drawing library},\n type = {inproceedings},\n year = {2017},\n keywords = {Flowchart,Javascript,SVG},\n pages = {110-115},\n websites = {http://ieeexplore.ieee.org/document/8093574/},\n id = {926dda1a-7712-30f0-b302-6de176b48bc9},\n created = {2018-03-16T13:30:30.555Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:59:47.002Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2017a},\n private_publication = {false},\n abstract = {While flow charts are one of the best ways to describe a computer program, drawing process is a laborious task for developers. This study describes an open source Javascript library named obfc.js, which we have developed to facilitate this task. This library generates SVG output on the client side for modern browsers and allows easy creation of diagrams and links. Moreover, it allows you to link click events to objects and links. This library will allow you to design sophisticated flow charts using a very small amount of text data instead of both large image and SVG Data. © 2017 IEEE.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Buluş, H. Nusret},\n doi = {10.1109/UBMK.2017.8093574},\n booktitle = {2017 International Conference on Computer Science and Engineering (UBMK)}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n A novel algorithm for extracting the user reviews from web pages.\n \n \n \n \n\n\n \n Uçar, E.; Uzun, E.; and Tüfekci, P.\n\n\n \n\n\n\n Journal of Information Science, 43(5): 696-712. 10 2017.\n \n\n\n\n
\n\n\n\n \n \n $\"A$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {A novel algorithm for extracting the user reviews from web pages},\n type = {article},\n year = {2017},\n keywords = {Efficient extraction,web data extraction,web user reviews},\n pages = {696-712},\n volume = {43},\n websites = {http://journals.sagepub.com/doi/10.1177/0165551516666446},\n month = {10},\n id = {95648a3d-c8d0-34be-a04a-fe1aa8f3ad56},\n created = {2018-03-16T13:30:30.629Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:59:46.783Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ucar2017},\n folder_uuids = {6d42ffd6-e28f-468b-89db-871815ea39c8},\n private_publication = {false},\n abstract = {Extracting the user reviews in websites such as forums, blogs, newspapers, commerce, trips, etc. is crucial for text processing applications (e.g. sentiment analysis, trend detection/monitoring and recommendation systems) which are needed to deal with structured data. Traditional algorithms have three processes consisting of Document Object Model (DOM) tree creation, extraction of features obtained from this tree and machine learning. However, these algorithms increase time complexity of extraction process. This study proposes a novel algorithm that involves two complementary stages. The first stage determines which HTML tags correspond to review layout for a web domain by using the DOM tree as well as its features and decision tree learning. The second stage extracts review layout for web pages in a web domain using the found tags obtained from the first stage. This stage is more time-efficient, being approximately 21 times faster compared to the first stage. Moreover, it achieves a relatively high accuracy of 96.67% in our experiments of review block extraction. © Chartered Institute of Library and Information Professionals.},\n bibtype = {article},\n author = {Uçar, Erdem and Uzun, Erdinç and Tüfekci, Pınar},\n doi = {10.1177/0165551516666446},\n journal = {Journal of Information Science},\n number = {5}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Comparison of string matching algorithms in web documents.\n \n \n \n \n\n\n \n Buluş, H., N.; Uzun, E.; and Doruk, A.\n\n\n \n\n\n\n In International Scientific Conference’2017 (UNITECH’17), volume 2, pages 279-282, 2017. \n \n\n\n\n
\n\n\n\n \n \n $\"Comparison$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Comparison of string matching algorithms in web documents},\n type = {inproceedings},\n year = {2017},\n pages = {279-282},\n volume = {2},\n websites = {https://erdincuzun.com/wp-content/uploads/download/s5_p256.pdf},\n id = {dffea588-b01f-375c-8cfa-1c75a4cdebef},\n created = {2018-06-05T12:53:51.315Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.508Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Bulus2017_G1},\n private_publication = {false},\n abstract = {String matching algorithms try to find position/s where one or more patterns (also called strings) are occurred in text. In this study, we compare 31 different pattern matching algorithms in web documents. In web documents, searching is crucial process for content extraction process. Therefore, lengths of html tags are examined for determining which algorithm or algorithms are suitable for matching process. Our experiments show that Skip Search algorithm is the best pattern matching algorithm with 0.170 ms for web documents. Moreover, it has 0.002 ms in preprocessing time and 0.168 ms in searching time.},\n bibtype = {inproceedings},\n author = {Buluş, Halil Nusret and Uzun, Erdinç and Doruk, Alpay},\n booktitle = {International Scientific Conference’2017 (UNITECH’17)},\n keywords = {.Net,Performance evaluation,String matching algorithms}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Evaluation of Hap, AngleSharp and HtmlDocument in web content extraction.\n \n \n \n \n\n\n \n Uzun, E.; Buluş, H., N.; Doruk, A.; and Özhan, E.\n\n\n \n\n\n\n In International Scientific Conference’2017 (UNITECH’17), volume 2, pages 275-278, 2017. \n \n\n\n\n
\n\n\n\n \n \n $\"Evaluation$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Evaluation of Hap, AngleSharp and HtmlDocument in web content extraction},\n type = {inproceedings},\n year = {2017},\n keywords = {.NET C#,DOM,HTML,Web Extraction Performance},\n pages = {275-278},\n volume = {2},\n websites = {https://erdincuzun.com/wp-content/uploads/download/s5_p255.pdf},\n id = {c4e12fa9-8c76-3455-b183-fda4a673dea8},\n created = {2018-06-05T12:53:51.408Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.192Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2017_G1},\n private_publication = {false},\n abstract = {With the DOM, programming languages can access and change all the HTML elements of a web page. There are several libraries for instantiating the DOM. In this study, we compare three different well-known .NET libraries, including HAP (Html Agility Pack), AngleSharp and MS_HtmlDocument to extract content from web pages. The experimental results indicate that AngleSharp achieves the best results with average 5.54 ms for preprocessing of the DOM and average 0.46 ms for extracting of a content from the DOM.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Buluş, Halil Nusret and Doruk, Alpay and Özhan, Erkan},\n booktitle = {International Scientific Conference’2017 (UNITECH’17)}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2016\n \n \n (2)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Hiyerarşik kümeleme modeli kullanan web tabanlı bir ödev değerlendirme sistemi.\n \n \n \n \n\n\n \n Uzun, E.; Erdoğan, C.; and Saygılı, A.\n\n\n \n\n\n\n Ejovoc (Electronic Journal of Vocational Colleges). 2016.\n \n\n\n\n
\n\n\n\n \n \n $\"Hiyerarşik$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 4 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {Hiyerarşik kümeleme modeli kullanan web tabanlı bir ödev değerlendirme sistemi},\n type = {article},\n year = {2016},\n keywords = {Document similarity,Hierarchical Clustering,Plagiarism Detection,Software Development},\n websites = {http://dergipark.gov.tr/ejovoc/issue/36634/417046#article_cite},\n id = {e97db021-5217-3be0-b67b-3d0eb7285d1c},\n created = {2018-06-05T12:53:51.408Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T13:37:09.476Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2016_Ejovoc},\n private_publication = {false},\n abstract = {Assignments are one of the most important parts of education process of students. In the classical assignment evaluation process, an assignment can be evaluated whether it is correct or not. However, for the assignments to give better contribution to education, plagiarisms committed by students should be considered. Detection of plagiarism and its extent are extremely difficult assignment evaluation procedures. In this study, in order to facilitate this procedure, a web-based application, which can combine document similarity measures with hierarchical clustering model, is introduced. This application gives the opportunity to evaluate which students submit similar assignments and the assignments’ similarity degree. Cosine, Dice and Jaccard similarity measures have been investigated in terms of document similarity calculation of this application. On the other hand, three different algorithms including Single Linkage, Complete Linkage and Average Group are examined in hierarchical clustering side. Test data which covers two education period of previous years and contains 54 different assignments of 18 different courses of 6 lecturers, are created. By using document similarity methods and hierarchical clustering algorithms, 9 different cophenetic correlation coefficients are obtained for each assignment and cophenetic correlation coefficients are calculated to test how well hierarchical clustering algorithms are . When the results were analyzed, it was discovered that Jaccard measure in document similarity and Average Group algorithm in hierarchical clustering is the best matching assignment evaluation pair.},\n bibtype = {article},\n author = {Uzun, Erdinç and Erdoğan, Cihat and Saygılı, Ahmet},\n journal = {Ejovoc (Electronic Journal of Vocational Colleges)}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n İlişkisel Veritabanlarında Denormalizasyon Etkisi: Bir Anket Uygulaması - Denormalization Effects on Relational Databases: A Survey Application.\n \n \n \n \n\n\n \n Uzun, E.; Buluş, H., N.; Erdoğan, C.; and Kaya, H.\n\n\n \n\n\n\n In International Conference on Computer Science and Engineering (UBMK 2016), 2016. \n \n\n\n\n
\n\n\n\n \n \n $\"İlişkisel$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {İlişkisel Veritabanlarında Denormalizasyon Etkisi: Bir Anket Uygulaması - Denormalization Effects on Relational Databases: A Survey Application},\n type = {inproceedings},\n year = {2016},\n websites = {https://erdincuzun.com/wp-content/uploads/download/ubmk_69_paper.pdf},\n id = {566bd353-53da-33e5-8b56-ca13e16ed825},\n created = {2018-06-05T12:53:51.483Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2022-05-17T11:17:25.252Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2016},\n folder_uuids = {bc101896-c263-4799-878a-a9fddc1af501},\n private_publication = {false},\n abstract = {In database design, processes that are aimed to speed up reading by adding redundant fields or agglomerating available fields are called denormalization. In this study, the denormalization processes applied to the questionnaire evaluation system, developed for Çorlu Faculty of Engineering, and their advantages are explained. The denormalization process provides a speed up gain of 38 times, along with a storage cost gain of 130 times.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Buluş, H. Nusret and Erdoğan, Cihat and Kaya, Heysem},\n booktitle = {International Conference on Computer Science and Engineering (UBMK 2016)},\n keywords = {Database design,Denormalization,Performance evaluation}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2015\n \n \n (1)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Fiziksel Programlama Platformları Kullanarak Elektrikli Taşıtların Hız, Batarya ve Sıcaklık Verilerinin Takibi.\n \n \n \n \n\n\n \n Doruk, A.; Buluş, H., N.; Moralar, A.; Çelen, S.; and Uzun, E.\n\n\n \n\n\n\n EJOVOC: Electronic Journal of Vocational Colleges, 5(4): 48-56. 2015.\n \n\n\n\n
\n\n\n\n \n \n $\"Fiziksel$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {Fiziksel Programlama Platformları Kullanarak Elektrikli Taşıtların Hız, Batarya ve Sıcaklık Verilerinin Takibi},\n type = {article},\n year = {2015},\n keywords = {Arduino,Electrical vehicles,Physical programing environment,Telemetry},\n pages = {48-56},\n volume = {5},\n websites = {http://dergipark.ulakbim.gov.tr/ejovoc/article/view/5000163196/5000147085},\n id = {68bdd162-c7c4-3d5c-8dda-0464ca1cfc91},\n created = {2018-06-05T12:53:51.487Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T13:37:08.816Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2015_Ejovoc},\n private_publication = {false},\n abstract = {Electric and solar vehicles obtain their motion energies via electric engines. The energy used within these engines is obtained using batteries. Electric vehicles’ battery status, temperature and instant velocity data is important for calculating the information of remaining range. In this study, a telemetry system, using physical programing platforms, is developped for measuring and remotely tracing of these data. Data are measured using various sensors, interpreted using physical programing platform and sent to a server via internet and presented over a developed web site.},\n bibtype = {article},\n author = {Doruk, Alpay and Buluş, Halil Nusret and Moralar, Aytaç and Çelen, Soner and Uzun, Erdinç},\n journal = {EJOVOC: Electronic Journal of Vocational Colleges},\n number = {4}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2014\n \n \n (1)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n An effective and efficient web content extractor for optimizing the crawling process.\n \n \n \n\n\n \n Uzun, E.; Güner, E., S.; Kiliçaslan, Y.; Yerlikaya, T.; and Agun, H., V.\n\n\n \n\n\n\n Software - Practice and Experience, 44(10): 1181-1199. 2014.\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {An effective and efficient web content extractor for optimizing the crawling process},\n type = {article},\n year = {2014},\n keywords = {Classification,Intelligent systems,Web content extraction,Web crawling},\n pages = {1181-1199},\n volume = {44},\n id = {b880214e-5d30-31c9-890c-6bfb19f60362},\n created = {2018-03-16T13:30:30.820Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-09-21T08:13:42.173Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2014},\n private_publication = {false},\n abstract = {Classical Web crawlers make use of only hyperlink information in the crawling process. However, focused crawlers are intended to download only Web pages that are relevant to a given topic by utilizing word information before downloading the Web page. But, Web pages contain additional information that can be useful for the crawling process. We have developed a crawler, iCrawler (intelligent crawler), the backbone of which is a Web content extractor that automatically pulls content out of seven different blocks: menus, links, main texts, headlines, summaries, additional necessaries, and unnecessary texts from Web pages. The extraction process consists of two steps, which invoke each other to obtain information from the blocks. The first step learns which HTML tags refer to which blocks using the decision tree learning algorithm. Being guided by numerous sources of information, the crawler becomes considerably effective. It achieved a relatively high accuracy of 96.37% in our experiments of block extraction. In the second step, the crawler extracts content from the blocks using string matching functions. These functions along with the mapping between tags and blocks learned in the first step provide iCrawler with considerable time and storage efficiency. More specifically, iCrawler performs 14 times faster in the second step than in the first step. Furthermore, iCrawler significantly decreases storage costs by 57.10% when compared with the texts obtained through classical HTML stripping.},\n bibtype = {article},\n author = {Uzun, Erdinç and Güner, Edip Serdar and Kiliçaslan, Yilmaz and Yerlikaya, Tarik and Agun, Hayri Volkan},\n doi = {10.1002/spe.2195},\n journal = {Software - Practice and Experience},\n number = {10}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2013\n \n \n (2)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n A hybrid approach for extracting informative content from web pages.\n \n \n \n\n\n \n Uzun, E.; Agun, H., V.; and Yerlikaya, T.\n\n\n \n\n\n\n Information Processing and Management, 49(4): 928-944. 2013.\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {A hybrid approach for extracting informative content from web pages},\n type = {article},\n year = {2013},\n keywords = {Template Detection,Web Cleaning,Web Content Extraction,Web Learning Modeling},\n pages = {928-944},\n volume = {49},\n id = {43c947b2-4168-3b3c-b825-ea192abfefc5},\n created = {2018-03-16T13:30:30.710Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:00:48.056Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2013},\n folder_uuids = {6d42ffd6-e28f-468b-89db-871815ea39c8,d4ba7016-85a6-45e9-8213-463276800ac8},\n private_publication = {false},\n abstract = {Eliminating noisy information and extracting informative content have become important issues for web mining, search and accessibility. This extraction process can employ automatic techniques and hand-crafted rules. Automatic extraction techniques focus on various machine learning methods, but implementing these techniques increases time complexity of the extraction process. Conversely, extraction through hand-crafted rules is an efficient technique that uses string manipulation functions, but preparing these rules is difficult and cumbersome for users. In this paper, we present a hybrid approach that contains two steps that can invoke each other. The first step discovers informative content using Decision Tree Learning as an appropriate machine learning method and creates rules from the results of this learning method. The second step extracts informative content using rules obtained from the first step. However, if the second step does not return an extraction result, the first step gets invoked. In our experiments, the first step achieves high accuracy with 95.76% in extraction of the informative content. Moreover, 71.92% of the rules can be used in the extraction process, and it is approximately 240 times faster than the first step. © 2013 Elsevier Ltd. All rights reserved.},\n bibtype = {article},\n author = {Uzun, Erdinç and Agun, Hayri Volkan and Yerlikaya, Tarik},\n doi = {10.1016/j.ipm.2013.02.005},\n journal = {Information Processing and Management},\n number = {4}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Author detection by using different term weighting schemes.\n \n \n \n \n\n\n \n Tufekci, P.; and Uzun, E.\n\n\n \n\n\n\n In 2013 21st Signal Processing and Communications Applications Conference (SIU), pages 1-4, 4 2013. IEEE\n \n\n\n\n
\n\n\n\n \n \n $\"Author$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Author detection by using different term weighting schemes},\n type = {inproceedings},\n year = {2013},\n keywords = {Author detection,NLP,Term weighting schemes,Text classification},\n pages = {1-4},\n websites = {http://ieeexplore.ieee.org/document/6531190/},\n month = {4},\n publisher = {IEEE},\n id = {a7f56d5f-beae-387f-ac49-be768f2e410d},\n created = {2018-06-05T12:53:51.540Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:59:46.632Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Tufekci2013},\n private_publication = {false},\n abstract = {In this study, the impact of term weighting on author detection as a type of text classification is investigated. The feature vector being used to represent texts, consists of stem words as features and their weight values, which are obtained by applying 14 different term weighting schemes. The performances of these feature vectors for 3 different datasets in the author detection are tested with some classification methods such as Naïve Bayes Multinominal (NBM), and Support Vector Machine (SVM), Decision Tree (C4.5), and Random Forrest (RF), and are compared with each other. As a result of that, the most successful classifier, which can predict the author of an article, is found as SVM classifier with 98.75% mean accuracy; the most successful term weighting scheme is found as ACTF.IDF.(ICF+1) with 91.54% general mean accuracy.},\n bibtype = {inproceedings},\n author = {Tufekci, Pınar and Uzun, Erdinç},\n doi = {10.1109/SIU.2013.6531190},\n booktitle = {2013 21st Signal Processing and Communications Applications Conference (SIU)}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2012\n \n \n (3)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n A fuzzy ranking approach for improving search results in Turkish as an agglutinative language.\n \n \n \n\n\n \n Uzun, E.\n\n\n \n\n\n\n Expert Systems with Applications, 39(5): 5658-5664. 2012.\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {A fuzzy ranking approach for improving search results in Turkish as an agglutinative language},\n type = {article},\n year = {2012},\n keywords = {Agglutinative languages,Fuzzy ranking in searching,Search engine,Term weighting,Turkish IR},\n pages = {5658-5664},\n volume = {39},\n id = {629b9970-8734-3409-865c-65ae07d5cb72},\n created = {2018-03-16T13:30:30.553Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2023-01-15T15:50:32.444Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2012a},\n private_publication = {false},\n abstract = {This study proposes a fuzzy ranking approach, designed for Turkish as an agglutinative language, that focuses on improving stemming techniques via using distances of characters in its search algorithm. Various studies focused on search engines are based on using stemming techniques in indexing process because of the higher percentage of relevancy that these techniques provide. However, stemming techniques may have negative effects on search results in some queries. While analyzing the search results to find the query terms those give irrelevant results and why, we observe that user's query suffixes are crucial in search performance. Therefore, the proposed fuzzy ranking approach supports traditional stemming approaches with the use of suffixes. The search results of this approach are significantly better than stemming techniques in where stemming technique is ineffective. In terms of overall results, the fuzzy ranking approach also gives satisfactory results when compared with stemming techniques such as a Turkish stemmer (19.43% of improvement) and word truncation technique (12.61% of improvement). Moreover, it is statistically better than no stemming with 28.61% of improvement. © 2011 Elsevier Ltd. All rights reserved.},\n bibtype = {article},\n author = {Uzun, Erdinç},\n doi = {10.1016/j.eswa.2011.11.105},\n journal = {Expert Systems with Applications},\n number = {5}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Web content extraction by using decision tree learning.\n \n \n \n \n\n\n \n Uzun, E.; Agun, H., V.; and Yerlikaya, T.\n\n\n \n\n\n\n In 2012 20th Signal Processing and Communications Applications Conference (SIU), pages 1-4, 2012. \n \n\n\n\n
\n\n\n\n \n \n $\"Web$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Web content extraction by using decision tree learning},\n type = {inproceedings},\n year = {2012},\n keywords = {DOM,Decision tree,Web content extraction},\n pages = {1-4},\n websites = {http://ieeexplore.ieee.org/document/6204476/},\n id = {7bcc560c-fc7c-3ff3-b320-0158acf6f924},\n created = {2018-03-16T13:30:30.712Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:59:46.813Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2012},\n private_publication = {false},\n abstract = {Via information extraction techniques, web pages are able to generate datasets for various studies such as natural language processing, and data mining. However, nowadays the uninformative sections like advertisement, menus, and links are in increase. The cleaning of web pages from uninformative sections, and extraction of informative content has become an important issue. In this study, we present an decision tree learning approach over DOM based features which aims to clean the uninformative sections and extract informative content in three classes: title, main content, and additional information. Through this approach, differently from previous studies, the learning model for the extraction of the main content constructed on DIV and TD tags. The proposed method achieved 95.58% accuracy in cleaning uninformative sections and extraction of the informative content. Especially for the extraction of the main block, 0.96 f-measure is obtained.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Agun, Hayri Volkan and Yerlikaya, Tarık},\n doi = {10.1109/SIU.2012.6204476},\n booktitle = {2012 20th Signal Processing and Communications Applications Conference (SIU)}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Text classification of web based news articles by using Turkish grammatical features.\n \n \n \n \n\n\n \n Tüfekci, P.; Uzun, E.; and Sevinç, B.\n\n\n \n\n\n\n In 2012 20th Signal Processing and Communications Applications Conference (SIU), pages 1-4, 4 2012. IEEE\n \n\n\n\n
\n\n\n\n \n \n $\"Text$ Website\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Text classification of web based news articles by using Turkish grammatical features},\n type = {inproceedings},\n year = {2012},\n keywords = {NLP,Text classification},\n pages = {1-4},\n websites = {http://ieeexplore.ieee.org/document/6204565/},\n month = {4},\n publisher = {IEEE},\n id = {5ff144c4-6c81-3ac9-a60f-93c82d2fb528},\n created = {2018-03-16T13:30:30.813Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:59:46.931Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Tufekci2012},\n private_publication = {false},\n abstract = {The dimensions of the feature vectors being used at the classification methods in the literature affect directly the time performance. In this study, how to reduce the dimension of the feature vector by using Turkish's grammar rules without compromising success rates is explained. The feature vector is weighted on the basis of the word frequency as the word stems have been selected as features. During this selection the effects of selection of the word stems with different length and type to the classification are investigated and when the word stems with noun type and the maximum length are selected as features, the success rate has been found to be at the highest level. When this selection is applied with the other methods which reduce the dimension, the dimension of the feature vector is decreased to 97.46%. Using the reduced feature vector the better succes rates generally have been obtained from Naive Bayes, SVM, C 4.5 and RF classification methods and the best performance achieved is 92.73% which has been obtained using the Naive Bayes method.},\n bibtype = {inproceedings},\n author = {Tüfekci, Pinar and Uzun, Erdinç and Sevinç, Burak},\n doi = {10.1109/SIU.2012.6204565},\n booktitle = {2012 20th Signal Processing and Communications Applications Conference (SIU)}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2011\n \n \n (4)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n A lightweight parser for extracting useful contents from web pages.\n \n \n \n \n\n\n \n Uzun, E.; Yerlikaya, T.; and Kurt, M.\n\n\n \n\n\n\n In 2nd International Symposium on Computing in Science & Engineering-ISCSE 2011, Kusadasi, Aydin, Turkey, pages 67-73, 2011. \n \n\n\n\n
\n\n\n\n \n \n $\"A$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {A lightweight parser for extracting useful contents from web pages},\n type = {inproceedings},\n year = {2011},\n pages = {67-73},\n websites = {https://erdincuzun.com/wp-content/uploads/download/2011/iscse_2011_2.pdf},\n id = {b0720d70-eef9-3df1-be0a-3e645db91218},\n created = {2018-06-05T12:53:51.594Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-03-01T11:26:32.026Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2011c},\n private_publication = {false},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Yerlikaya, Tarık and Kurt, Meltem},\n booktitle = {2nd International Symposium on Computing in Science & Engineering-ISCSE 2011, Kusadasi, Aydin, Turkey},\n keywords = {DOM,Parsers,Performance evaluation,Web content extraction}\n}

\n

\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Analyzing of the Evolution of Web Pages by Using a Domain Based Web Crawler.\n \n \n \n \n\n\n \n Uzun, E.; Yerlikaya, T.; and Kurt, M.\n\n\n \n\n\n\n In Techsys, 26-28 May, Plovdiv, Bulgaria, pages 151-156, 2011. \n \n\n\n\n
\n\n\n\n \n \n $\"Analyzing$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 5 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Analyzing of the Evolution of Web Pages by Using a Domain Based Web Crawler},\n type = {inproceedings},\n year = {2011},\n keywords = {Degree of Changes in Web Pages,Web Crawlers,Web Evolution},\n pages = {151-156},\n websites = {https://erdincuzun.com/wp-content/uploads/download/2011/plovdiv01.pdf},\n id = {0966001a-cb13-3243-a557-ea1c1ee9f9c1},\n created = {2018-06-05T12:53:51.612Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2021-02-21T16:02:44.528Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2011b},\n private_publication = {false},\n abstract = {To improve algorithms that are used in search engines, crawlers and indexers, the evolution of web pages should be examined. For this purpose, we developed a domain based crawler, namely SET Crawler, which collects the web archives between 1998 and 2008 of three Turkish daily popular newspapers (Hurriyet, Milliyet and Sabah). After completion of the crawl, we obtained a set of 3430997 HTML pages. While the average file size of one web page in 1998 approximately is 5.19 KB, this size in 2008 is 53.94 KB. When considering the size of main contents of web pages are similar, this observation shows the degree of increase in the use of unnecessary contents and tags. Analyses indicate that the use of link, image and layout tags has increased significantly in the last decades. Moreover, the tag has been used instead of the},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Yerlikaya, Tarık and Kurt, Meltem},\n booktitle = {Techsys, 26-28 May, Plovdiv, Bulgaria}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Examining the Impacts of Stemming Techniques on Turkish Search Results by Using Search Engine for Turkish.\n \n \n \n \n\n\n \n Uzun, E.\n\n\n \n\n\n\n In 2nd International Symposium on Computing in Science & Engineering, pages 33-39, 2011. \n \n\n\n\n
\n\n\n\n \n \n $\"Examining$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Examining the Impacts of Stemming Techniques on Turkish Search Results by Using Search Engine for Turkish},\n type = {inproceedings},\n year = {2011},\n keywords = {Agglutinative languages,Stemming methods,Turkish IR},\n pages = {33-39},\n websites = {https://erdincuzun.com/wp-content/uploads/download/2011/iscse_2011_1.pdf},\n id = {58c210b0-9b37-3256-84e2-74836ff749d7},\n created = {2018-06-05T12:53:51.676Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.440Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2011a},\n private_publication = {false},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç},\n booktitle = {2nd International Symposium on Computing in Science & Engineering}\n}

\n

\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Reducing Computational Complexity by Restricting the Size of Compared Web Contents.\n \n \n \n \n\n\n \n Uzun, E.; Yerlikaya, T.; and Kurt, M.\n\n\n \n\n\n\n In Techsys, 26-28 May, Plovdiv, Bulgaria, pages 157-160, 2011. \n \n\n\n\n
\n\n\n\n \n \n $\"Reducing$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Reducing Computational Complexity by Restricting the Size of Compared Web Contents},\n type = {inproceedings},\n year = {2011},\n pages = {157-160},\n websites = {https://erdincuzun.com/wp-content/uploads/download/2011/plovdiv02.pdf},\n id = {f8abacd6-9015-3977-a456-0a19263ead7a},\n created = {2018-06-05T12:53:51.707Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.228Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2011},\n private_publication = {false},\n abstract = {Extracting the relevant contents on web pages is an important issue for researches on information retrieval, data mining and natural language processing. In this issue, contents of tags in same domain web pages can be used to discover unnecessary contents. However, little changes in tag contents of web pages can cause problems in extraction. Therefore, we have adapted levenshtein distance algorithm to overcome these problems. Nevertheless, tag contents that may contain too many characters, have a negative impact on computational complexity. Hence, a solution, which reduces this complexity by comparing only a few characters, is proposed. In experiments, this solution gives a significant improvement (with 84.37%) in the performance of the use of levenshtein distance algorithm to find irrelevant contents.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Yerlikaya, Tarık and Kurt, Meltem},\n booktitle = {Techsys, 26-28 May, Plovdiv, Bulgaria},\n keywords = {Levenshtein Distance Algorithm,Parsing HTML,Reducing Complexity}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2010\n \n \n (2)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Html İçinde Gereksiz Kelimeleri Çıkaran Benzer Metin Tespit Uygulaması.\n \n \n \n \n\n\n \n Uzun, E.\n\n\n \n\n\n\n In Akıllı Sistemlerde Yenilikler ve Uygulamaları Sempozyumu (ASYU-2010), pages 48-52, 2010. \n \n\n\n\n
\n\n\n\n \n \n $\"Html$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Html İçinde Gereksiz Kelimeleri Çıkaran Benzer Metin Tespit Uygulaması},\n type = {inproceedings},\n year = {2010},\n keywords = {DOM,Web Cleaning},\n pages = {48-52},\n websites = {https://erdincuzun.com/wp-content/uploads/download/2010/crawler_ver_3_2.pdf},\n id = {a35a8264-d0a2-3cf1-b012-389f69ae4385},\n created = {2018-06-05T12:53:51.772Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.053Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {false},\n hidden = {false},\n citation_key = {Uzun2010b},\n private_publication = {false},\n abstract = {Kelimelerin metin içinde bulunma sıklığını kullanarak arama yapan arama motorlarından elde edilen sonuçlar, HTML içindeki gereksiz kelimelerden etkilenmektedir. Bu çalışmada, herhangi bir eğitim verisi kullanmadan metinlerdeki benzerliklerini tespit edip gereksiz metinleri çıkaran bir uygulama ve bu uygulamadan elde edilen sonuçlar anlatılmaktadır. Bu uygulama sayesinde HTML dokümana göre %90,59 oranında gereksiz kelime temizlenmiştir. Ayrıca, HTML etiketleri ayrıştırılıp içindeki kelimelere kök bulma işlemi uygulandığında sadece kelimelerin %20,38 oranında kökü bulunurken benzer metin tespit uygulaması sayesinde elde edilen kelimelerin %69,55 oranında kelime kökü tespit edilebilmiştir.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç},\n booktitle = {Akıllı Sistemlerde Yenilikler ve Uygulamaları Sempozyumu (ASYU-2010)}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n İnternet Sayfalarındaki Asıl İçeriği Gösterebilen Akıllı Bir Tarayıcı.\n \n \n \n \n\n\n \n Yerlikaya, T.; and Uzun, E.\n\n\n \n\n\n\n In Akıllı Sistemlerde Yenilikler ve Uygulamaları Sempozyumu (ASYU-2010), pages 53-57, 2010. \n \n\n\n\n
\n\n\n\n \n \n $\"İnternet$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {İnternet Sayfalarındaki Asıl İçeriği Gösterebilen Akıllı Bir Tarayıcı},\n type = {inproceedings},\n year = {2010},\n keywords = {DOM,String similarity,Web Cleaning},\n pages = {53-57},\n websites = {https://erdincuzun.com/wp-content/uploads/download/2010/intelbrow_2_2.pdf},\n id = {9837c603-7bfa-3b16-8887-082ad536f2de},\n created = {2018-06-05T12:53:51.941Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.474Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2010a},\n private_publication = {false},\n abstract = {Kelimelerin metin içinde bulunma sıklığını kullanarak arama yapan arama motorlarından elde edilen sonuçlar, HTML içindeki gereksiz kelimelerden etkilenmektedir. Bu çalışmada, herhangi bir eğitim verisi kullanmadan metinlerdeki benzerliklerini tespit edip gereksiz metinleri çıkaran bir uygulama ve bu uygulamadan elde edilen sonuçlar anlatılmaktadır. Bu uygulama sayesinde HTML dokümana göre %90,59 oranında gereksiz kelime temizlenmiştir. Ayrıca, HTML etiketleri ayrıştırılıp içindeki kelimelere kök bulma işlemi uygulandığında sadece kelimelerin %20,38 oranında kökü bulunurken benzer metin tespit uygulaması sayesinde elde edilen kelimelerin %69,55 oranında kelime kökü tespit edilebilmiştir.},\n bibtype = {inproceedings},\n author = {Yerlikaya, Tarık and Uzun, Erdinç},\n booktitle = {Akıllı Sistemlerde Yenilikler ve Uygulamaları Sempozyumu (ASYU-2010)}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2008\n \n \n (2)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n Web-based acquisition of subcategorization frames for Turkish.\n \n \n \n \n\n\n \n Uzun, E.; Kılıçaslan, Y.; Agun, H., V.; and Uçar, E.\n\n\n \n\n\n\n In The Eighth International Conference on Artificial Intelligence and Soft Computing, Computational Intelligence: Methods and Applications, IEEE Computational Intelligence Society, 2008. \n \n\n\n\n
\n\n\n\n \n \n $\"Web-based$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Web-based acquisition of subcategorization frames for Turkish},\n type = {inproceedings},\n year = {2008},\n keywords = {Agglutinative languages,Subcategorization frames,Web as a corpus},\n websites = {https://erdincuzun.com/wp-content/uploads/download/paper_WebSubCat.pdf},\n id = {4d7e2a40-6c35-3b79-a41f-5287035da31f},\n created = {2018-06-05T12:53:52.020Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.518Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2008a},\n private_publication = {false},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Agun, Hayri Volkan and Uçar, Erdem},\n booktitle = {The Eighth International Conference on Artificial Intelligence and Soft Computing, Computational Intelligence: Methods and Applications, IEEE Computational Intelligence Society}\n}

\n

\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Html, Xml ve Web Servislerinin İnternet Sunucuları Üzerindeki Etkisinin İncelenmesi.\n \n \n \n \n\n\n \n Uzun, E.; Kılıçaslan, Y.; and Uçar, E.\n\n\n \n\n\n\n Trakya Univ J Sci, 8(2): 81-85. 2008.\n \n\n\n\n
\n\n\n\n \n \n $\"Html,$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@article{\n title = {Html, Xml ve Web Servislerinin İnternet Sunucuları Üzerindeki Etkisinin İncelenmesi},\n type = {article},\n year = {2008},\n keywords = {Internet Servers,Performance,Web Services,XML},\n pages = {81-85},\n volume = {8},\n websites = {http://dergipark.gov.tr/download/article-file/213799},\n id = {3d117e72-cfae-3c10-be3b-b7da4feaf858},\n created = {2018-06-05T12:53:52.147Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T13:06:53.627Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2008},\n private_publication = {false},\n abstract = {Since the emergence of the concept of WWW, the HTML markup language that encodes visual or non-visual information has become the base of the Internet. Because of the inefficiency of HTML to show data, the XML markup language has begun to take a place in the domain of Internet. With XML, the concept of Web services came to existence. The aim of this study is to examine the effect of HTML, XML and Web Services on Internet servers. When examining this effect, we will make use of some dictionaries available on the Internet and similar dictionary which we developed to use in linguistic applications.},\n bibtype = {article},\n author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem},\n journal = {Trakya Univ J Sci},\n number = {2}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2007\n \n \n (3)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n Automatic acquisition of subcategorization frames for Turkish with purely statistical methods.\n \n \n \n\n\n \n Uzun, E.; Kılıçaslan, Y.; and Uçar, E.\n\n\n \n\n\n\n In Proceedings of the International Symposium on Innovations in Intelligent Systems and Applications (INISTA), Istanbul, Turkey, pages 11-15, 2007. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Automatic acquisition of subcategorization frames for Turkish with purely statistical methods},\n type = {inproceedings},\n year = {2007},\n keywords = {Agglutinative languages,Subcategorization frames,Unsupervised methods},\n pages = {11-15},\n id = {0a223518-ff18-322a-8738-cd48d6864029},\n created = {2018-06-05T12:53:52.047Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T13:37:08.845Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2007a},\n private_publication = {false},\n abstract = {This paper presents the results of two experiments conducted by applying the purely statistical methods log likelihood and t-score to Turkish in order to acquire subcategorization frames for this language. The results achieved are compared with some results reported for other languages. The comparison is evaluated in terms of language typology.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem},\n booktitle = {Proceedings of the International Symposium on Innovations in Intelligent Systems and Applications (INISTA), Istanbul, Turkey}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n Web based sentence collector.\n \n \n \n \n\n\n \n Uzun, E.; Kılıçaslan, Y.; and Uçar, E.\n\n\n \n\n\n\n In 9th international scientific conference in computer and communication systems and technologies, Smolian, Bulgaria, pages 235-241, 2007. \n \n\n\n\n
\n\n\n\n \n \n $\"Web$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {Web based sentence collector},\n type = {inproceedings},\n year = {2007},\n keywords = {Search engine,Web as corpus,Web crawling},\n pages = {235-241},\n websites = {https://erdincuzun.com/wp-content/uploads/download/wbse.pdf},\n id = {75a385a0-88d2-3942-aa21-32e01f4706e0},\n created = {2018-06-05T12:53:52.217Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.217Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2007},\n private_publication = {false},\n abstract = {The World Wide Web can be used as a source of machine-readable text for corpora. Search engines, programs that search documents for specified keywords and return a list of the documents, are the main tools by which such texts can be collected. However, the usefulness of results returned by search engines is limited at least by the sheer amount of noise on the Web. This study describes a Web Based Sentence Collector (WBSC) that uses search engines for retrieving Turkish documents and filters out any detected noise that degenerates the grammaticality of the sentences.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem},\n booktitle = {9th international scientific conference in computer and communication systems and technologies, Smolian, Bulgaria}\n}

\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n İnternet tabanlı bilgi erişimi destekli bir otomatik öğrenme sistemi.\n \n \n \n \n\n\n \n Uzun, E.\n\n\n \n\n\n\n Ph.D. Thesis, 2007.\n \n\n\n\n
\n\n\n\n \n \n $\"İnternet$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@phdthesis{\n title = {İnternet tabanlı bilgi erişimi destekli bir otomatik öğrenme sistemi},\n type = {phdthesis},\n year = {2007},\n keywords = {Automatic acquisition of subcategorization frames,Machine learning methods,Web as a corpus},\n websites = {https://tez.yok.gov.tr/UlusalTezMerkezi/TezGoster?key=XohQ0H2mJnBfxLPsY8dG4yoIRmLa5H7omNhxdplRP1-k5w0pbxn25pSMbkEBjQ9F},\n institution = {Trakya University},\n department = {Computer Engineering Department},\n id = {8ee3b073-ca0f-3ea6-973c-e2aff373df3c},\n created = {2018-06-05T12:53:52.311Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:59:46.907Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2010},\n private_publication = {false},\n abstract = {This thesis presents a web-based system that is intended to perform the task of automatic acquisition of subcategorization frames for Turkish. As a pro-drop, a referentially sparse and free word order language, Turkish provides an interesting and challenging domain of application for natural language processing tasks. The thesis aims to contribute to the fields of information retrieval, natural language processing and machine learning in the following respects. Firstly, we offer a web-based approach to the automatic construction of corpora to be used in natural language processing and machine learning work. To this effect, we implemented a tool that collects grammatical Turkish sentences from internet via search engines and annotates them with respect to case marking information. Secondly, various machine learning methods were applied to the generated corpus in order to acquire the subcategorization frames of a set of randomly chosen Turkish verbs. Thirdly, we divided our set of patterns into several subsets of different sizes to understand effect of data size on the performance of methods. Lastly, we offer a comparative evaluation of the methods used in our experiments, focusing particularly on the distinction between supervised and unsupervised methods. The thesis is organized as follows. The first chapter gives a brief account of the concepts of information retrieval, subcategorization frame and machine learning. Moreover, this chapter touches upon the relevant literature and the peculiarities of a Turkish as a language to be investigated from a computational point of view. The second chapter introduces some machine learning algorithms and techniques used in our experiments. In the third chapter, we describe the view of web as a corpus that is the largest data set available for natural language studies. In the fourth chapter, the design and implementation aspects of the proposed system are given. The fifth chapter reports on the results of our experiments and provides a comparative evaluation of the methods used in the experiments along with observations on the effect of data size on the performances. The thesis ends with a summary of major findings and conclusions in chapter six.},\n bibtype = {phdthesis},\n author = {Uzun, Erdinç}\n}

\n

\n\n\n

\n This thesis presents a web-based system that is intended to perform the task of automatic acquisition of subcategorization frames for Turkish. As a pro-drop, a referentially sparse and free word order language, Turkish provides an interesting and challenging domain of application for natural language processing tasks. The thesis aims to contribute to the fields of information retrieval, natural language processing and machine learning in the following respects. Firstly, we offer a web-based approach to the automatic construction of corpora to be used in natural language processing and machine learning work. To this effect, we implemented a tool that collects grammatical Turkish sentences from internet via search engines and annotates them with respect to case marking information. Secondly, various machine learning methods were applied to the generated corpus in order to acquire the subcategorization frames of a set of randomly chosen Turkish verbs. Thirdly, we divided our set of patterns into several subsets of different sizes to understand effect of data size on the performance of methods. Lastly, we offer a comparative evaluation of the methods used in our experiments, focusing particularly on the distinction between supervised and unsupervised methods. The thesis is organized as follows. The first chapter gives a brief account of the concepts of information retrieval, subcategorization frame and machine learning. Moreover, this chapter touches upon the relevant literature and the peculiarities of a Turkish as a language to be investigated from a computational point of view. The second chapter introduces some machine learning algorithms and techniques used in our experiments. In the third chapter, we describe the view of web as a corpus that is the largest data set available for natural language studies. In the fourth chapter, the design and implementation aspects of the proposed system are given. The fifth chapter reports on the results of our experiments and provides a comparative evaluation of the methods used in the experiments along with observations on the effect of data size on the performances. The thesis ends with a summary of major findings and conclusions in chapter six.\n

\n\n\n

\n\n\n\n\n\n

\n

\n \n 2006\n \n \n (1)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n XML’in Zaman ve Yer Etkinliği Açısından İncelenmesi.\n \n \n \n \n\n\n \n Uzun, E.; Kılıçaslan, Y.; and Uçar, E.\n\n\n \n\n\n\n In Bilgi Teknolojileri Kongresi IV, Akademik Bilişim 2006, 09-11 şubat 2006, Denizli, Pamukkale Üniversitesi, pages 509-511, 2006. \n \n\n\n\n
\n\n\n\n \n \n $\"XML’in$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {XML’in Zaman ve Yer Etkinliği Açısından İncelenmesi},\n type = {inproceedings},\n year = {2006},\n keywords = {JSON,Performance evaluation,SMEL,XML},\n pages = {509-511},\n websites = {https://erdincuzun.com/wp-content/uploads/download/ab_141.pdf},\n id = {7b3512a9-440c-3c08-92ae-204f347747a8},\n created = {2018-06-05T12:53:52.385Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.050Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {false},\n hidden = {false},\n citation_key = {Uzun2006},\n private_publication = {false},\n abstract = {XML is a widely spreading markup language designed for the web base of tomorrow. Even though Xml is an efficient language in terms of readibility and describility, it has ostacles on space efficiency because of the redundant use of tags. In this studythe cost of space effiency problem and its probable solutions are discussed.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem},\n booktitle = {Bilgi Teknolojileri Kongresi IV, Akademik Bilişim 2006, 09-11 şubat 2006, Denizli, Pamukkale Üniversitesi}\n}

\n

\n\n\n\n\n\n

\n

\n \n 2004\n \n \n (2)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n A New Approach to Developing A Data Exchange Language as An Alternative to XML.\n \n \n \n \n\n\n \n Uzun, E.; Kılıçaslan, Y.; and Uçar, E.\n\n\n \n\n\n\n In International Scientific Conference’2004 (UNITECH’04), Gabrovo, Bulgaria, November 18-19, pages Vol. I, 318-321, 2004. \n \n\n\n\n
\n\n\n\n \n \n $\"A$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n\n\n\n

\n

@inproceedings{\n title = {A New Approach to Developing A Data Exchange Language as An Alternative to XML},\n type = {inproceedings},\n year = {2004},\n keywords = {JSON,Performance evaluation,SMEL,XML},\n pages = {Vol. I, 318-321},\n websites = {https://erdincuzun.com/wp-content/uploads/download/gabrova.pdf},\n id = {9277fcb4-f0a5-3f39-a740-5cd3caf5783d},\n created = {2018-06-05T12:53:52.382Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.333Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {false},\n hidden = {false},\n citation_key = {Uzun2004},\n private_publication = {false},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem},\n booktitle = {International Scientific Conference’2004 (UNITECH’04), Gabrovo, Bulgaria, November 18-19}\n}

\n

\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n E-Belediyecilik ve Bir Kent Bilgi Sistemi Uygulaması.\n \n \n \n \n\n\n \n Uçar, E.; Uzun, E.; and Uçar, Ö.\n\n\n \n\n\n\n Pamukkale Üniversitesi Mühendislik Bilimleri Dergisi, 10(4): 33-35. 2004.\n \n\n\n\n
\n\n\n\n \n \n $\"E-Belediyecilik$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@article{\n title = {E-Belediyecilik ve Bir Kent Bilgi Sistemi Uygulaması},\n type = {article},\n year = {2004},\n pages = {33-35},\n volume = {10},\n websites = {http://dergipark.ulakbim.gov.tr/pajes/article/view/5000089702/5000083336},\n id = {0376b3c6-ff01-3a19-a4a1-afc41f12e53d},\n created = {2018-06-05T12:53:52.459Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:00:48.148Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ucar2004},\n private_publication = {false},\n bibtype = {article},\n author = {Uçar, Erdem and Uzun, Erdinç and Uçar, Özlem},\n journal = {Pamukkale Üniversitesi Mühendislik Bilimleri Dergisi},\n number = {4}\n}

\n

\n\n\n\n

\n\n\n\n\n\n

\n

\n \n 2003\n \n \n (3)\n \n \n

\n

\n \n \n

\n \n\n \n \n \n \n \n \n ISAPI Uygulamalarında, Sunucu Sistemlerde Oturumlar Oluşturarak Kullanıcı Erişim Kontrolü ve Bir Uygulaması.\n \n \n \n \n\n\n \n Uçar, E.; Uzun, E.; and Karslı, V.\n\n\n \n\n\n\n In Pamukkale Üniversitesi, II. Bilgi Teknolojileri Kongresi, Bilgitek 2003, 01-04 Mayıs 2003, Denizli, pages 79-81, 2003. \n \n\n\n\n
\n\n\n\n \n \n $\"ISAPI$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@inproceedings{\n title = {ISAPI Uygulamalarında, Sunucu Sistemlerde Oturumlar Oluşturarak Kullanıcı Erişim Kontrolü ve Bir Uygulaması},\n type = {inproceedings},\n year = {2003},\n pages = {79-81},\n websites = {https://erdincuzun.com/wp-content/uploads/download/isapi.pdf},\n id = {49497186-5be7-39be-a2aa-700f8e7dd7af},\n created = {2018-06-05T12:53:52.528Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.372Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ucar2003b},\n private_publication = {false},\n bibtype = {inproceedings},\n author = {Uçar, Erdem and Uzun, Erdinç and Karslı, Veysel},\n booktitle = {Pamukkale Üniversitesi, II. Bilgi Teknolojileri Kongresi, Bilgitek 2003, 01-04 Mayıs 2003, Denizli}\n}

\n

\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n \n The Development Of Web Based Applications Creating User Sessions With Isapi.\n \n \n \n \n\n\n \n Uçar, E.; Uzun, E.; and Karslı, V.\n\n\n \n\n\n\n In International XII. Turkish Symposium on Artificial Intelligence and Neural Networks, Çanakkale, Temmuz, pages 306-307, 2003. \n \n\n\n\n
\n\n\n\n \n \n $\"The$ Website\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@inproceedings{\n title = {The Development Of Web Based Applications Creating User Sessions With Isapi},\n type = {inproceedings},\n year = {2003},\n pages = {306-307},\n websites = {https://erdincuzun.com/wp-content/uploads/download/isapi_tainn.pdf},\n id = {ae02a6c2-a1f6-3657-ac19-79249cf3c67f},\n created = {2018-06-05T12:53:52.533Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.487Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ucar2003a},\n private_publication = {false},\n bibtype = {inproceedings},\n author = {Uçar, Erdem and Uzun, Erdinç and Karslı, Veysel},\n booktitle = {International XII. Turkish Symposium on Artificial Intelligence and Neural Networks, Çanakkale, Temmuz}\n}

\n

\n\n\n\n

\n\n\n

\n \n\n \n \n \n \n \n E-Belediyecilik ve Bir Kent Bilgi Sistemi Uygulaması.\n \n \n \n\n\n \n Uçar, E.; Uzun, E.; and Uçar, Ö.\n\n\n \n\n\n\n In Pamukkale Üniversitesi, II. Bilgi Teknolojileri Kongresi, Bilgitek 2003, 01-04 Mayıs 2003, Denizli, pages 76-78, 2003. \n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

\n

@inproceedings{\n title = {E-Belediyecilik ve Bir Kent Bilgi Sistemi Uygulaması},\n type = {inproceedings},\n year = {2003},\n pages = {76-78},\n id = {0b267364-8bb2-3c1b-bfdd-759cf208edd7},\n created = {2018-07-03T11:58:15.033Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2018-07-04T12:00:48.341Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Ucar2003},\n private_publication = {false},\n bibtype = {inproceedings},\n author = {Uçar, Erdem and Uzun, Erdinç and Uçar, Özlem},\n booktitle = {Pamukkale Üniversitesi, II. Bilgi Teknolojileri Kongresi, Bilgitek 2003, 01-04 Mayıs 2003, Denizli}\n}

\n

\n\n\n\n

\n\n\n\n\n\n

\n