Web based sentence collector. Uzun, E., Kılıçaslan, Y., & Uçar, E. In 9th international scientific conference in computer and communication systems and technologies, Smolian, Bulgaria, pages 235-241, 2007. Website abstract bibtex The World Wide Web can be used as a source of machine-readable text for corpora. Search engines, programs that search documents for specified keywords and return a list of the documents, are the main tools by which such texts can be collected. However, the usefulness of results returned by search engines is limited at least by the sheer amount of noise on the Web. This study describes a Web Based Sentence Collector (WBSC) that uses search engines for retrieving Turkish documents and filters out any detected noise that degenerates the grammaticality of the sentences.
@inproceedings{
title = {Web based sentence collector},
type = {inproceedings},
year = {2007},
keywords = {Search engine,Web as corpus,Web crawling},
pages = {235-241},
websites = {https://erdincuzun.com/wp-content/uploads/download/wbse.pdf},
id = {75a385a0-88d2-3942-aa21-32e01f4706e0},
created = {2018-06-05T12:53:52.217Z},
file_attached = {false},
profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},
last_modified = {2020-01-16T20:29:39.217Z},
read = {false},
starred = {false},
authored = {true},
confirmed = {true},
hidden = {false},
citation_key = {Uzun2007},
private_publication = {false},
abstract = {The World Wide Web can be used as a source of machine-readable text for corpora. Search engines, programs that search documents for specified keywords and return a list of the documents, are the main tools by which such texts can be collected. However, the usefulness of results returned by search engines is limited at least by the sheer amount of noise on the Web. This study describes a Web Based Sentence Collector (WBSC) that uses search engines for retrieving Turkish documents and filters out any detected noise that degenerates the grammaticality of the sentences.},
bibtype = {inproceedings},
author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem},
booktitle = {9th international scientific conference in computer and communication systems and technologies, Smolian, Bulgaria}
}
Downloads: 0
{"_id":"RS9vb98ymGsPGnDkX","bibbaseid":"uzun-klaslan-uar-webbasedsentencecollector-2007","downloads":0,"creationDate":"2018-07-03T12:59:41.827Z","title":"Web based sentence collector","author_short":["Uzun, E.","Kılıçaslan, Y.","Uçar, E."],"year":2007,"bibtype":"inproceedings","biburl":"https://bibbase.org/service/mendeley/37fa15c3-e5d0-3212-8e18-e4c72814fd47","bibdata":{"title":"Web based sentence collector","type":"inproceedings","year":"2007","keywords":"Search engine,Web as corpus,Web crawling","pages":"235-241","websites":"https://erdincuzun.com/wp-content/uploads/download/wbse.pdf","id":"75a385a0-88d2-3942-aa21-32e01f4706e0","created":"2018-06-05T12:53:52.217Z","file_attached":false,"profile_id":"37fa15c3-e5d0-3212-8e18-e4c72814fd47","last_modified":"2020-01-16T20:29:39.217Z","read":false,"starred":false,"authored":"true","confirmed":"true","hidden":false,"citation_key":"Uzun2007","private_publication":false,"abstract":"The World Wide Web can be used as a source of machine-readable text for corpora. Search engines, programs that search documents for specified keywords and return a list of the documents, are the main tools by which such texts can be collected. However, the usefulness of results returned by search engines is limited at least by the sheer amount of noise on the Web. This study describes a Web Based Sentence Collector (WBSC) that uses search engines for retrieving Turkish documents and filters out any detected noise that degenerates the grammaticality of the sentences.","bibtype":"inproceedings","author":"Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem","booktitle":"9th international scientific conference in computer and communication systems and technologies, Smolian, Bulgaria","bibtex":"@inproceedings{\n title = {Web based sentence collector},\n type = {inproceedings},\n year = {2007},\n keywords = {Search engine,Web as corpus,Web crawling},\n pages = {235-241},\n websites = {https://erdincuzun.com/wp-content/uploads/download/wbse.pdf},\n id = {75a385a0-88d2-3942-aa21-32e01f4706e0},\n created = {2018-06-05T12:53:52.217Z},\n file_attached = {false},\n profile_id = {37fa15c3-e5d0-3212-8e18-e4c72814fd47},\n last_modified = {2020-01-16T20:29:39.217Z},\n read = {false},\n starred = {false},\n authored = {true},\n confirmed = {true},\n hidden = {false},\n citation_key = {Uzun2007},\n private_publication = {false},\n abstract = {The World Wide Web can be used as a source of machine-readable text for corpora. Search engines, programs that search documents for specified keywords and return a list of the documents, are the main tools by which such texts can be collected. However, the usefulness of results returned by search engines is limited at least by the sheer amount of noise on the Web. This study describes a Web Based Sentence Collector (WBSC) that uses search engines for retrieving Turkish documents and filters out any detected noise that degenerates the grammaticality of the sentences.},\n bibtype = {inproceedings},\n author = {Uzun, Erdinç and Kılıçaslan, Yılmaz and Uçar, Erdem},\n booktitle = {9th international scientific conference in computer and communication systems and technologies, Smolian, Bulgaria}\n}","author_short":["Uzun, E.","Kılıçaslan, Y.","Uçar, E."],"urls":{"Website":"https://erdincuzun.com/wp-content/uploads/download/wbse.pdf"},"biburl":"https://bibbase.org/service/mendeley/37fa15c3-e5d0-3212-8e18-e4c72814fd47","bibbaseid":"uzun-klaslan-uar-webbasedsentencecollector-2007","role":"author","keyword":["Search engine","Web as corpus","Web crawling"],"metadata":{"authorlinks":{"uzun, e":"https://erdincuzun.com/yayinlar/"}},"downloads":0},"search_terms":["web","based","sentence","collector","uzun","kılıçaslan","uçar"],"keywords":["search engine","web as corpus","web crawling"],"authorIDs":["QrE2Jk7Eehmqc5trT"],"dataSources":["mqdHLrE2gnaRYnL6B","ya2CyA73rpZseyrZ8"]}