More Effective Boilerplate Removal - the GoldMiner Algorithm. Endrédy, I. & Novák, A. Polibits - Research journal on Computer science and computer engineering with applications, 2013.
More Effective Boilerplate Removal - the GoldMiner Algorithm [link]Paper  bibtex   
@article{endredy_more_2013,
	title = {More {Effective} {Boilerplate} {Removal} - the {GoldMiner} {Algorithm}},
	issn = {1870-9044},
	url = {http://polibits.gelbukh.com/2013_48},
	language = {eng},
	number = {48},
	journal = {Polibits - Research journal on Computer science and computer engineering with applications},
	author = {Endrédy, István and Novák, Attila},
	year = {2013},
	keywords = {Corpus building, boilerplate removal, the web as corpus},
	pages = {79--83},
}

Downloads: 0