On the cost-effectiveness of neural and non-neural approaches and representations for text classification: A comprehensive comparative study. Cunha, W., Mangaravite, V., Gomes, C., Canuto, S., Resende, E., Nascimento, C., Viegas, F., França, C., Martins, W. S., Almeida, J. M., Rosa, T., Rocha, L., & Gonçalves, M. A. Information Processing and Management, 58(3):102481, 2021. Publisher: Elsevier Ltd
On the cost-effectiveness of neural and non-neural approaches and representations for text classification: A comprehensive comparative study [link]Paper  doi  abstract   bibtex   
This article brings two major contributions. First, we present the results of a critical analysis of recent scientific articles about neural and non-neural approaches and representations for automatic text classification (ATC). This analysis is focused on assessing the scientific rigor of such studies. It reveals a profusion of potential issues related to the experimental procedures including: (i) use of inadequate experimental protocols, including no repetitions for the sake of assessing variability and generalization; (ii) lack of statistical treatment of the results; (iii) lack of details on hyperparameter tuning, especially of the baselines; (iv) use of inadequate measures of classification effectiveness (e.g., accuracy with skewed distributions). Second, we provide some organization and ground to the field by performing a comprehensive and scientifically sound comparison of recent neural and non-neural ATC solutions. Our study provides a more complete picture by looking beyond classification effectiveness, taking the trade-off between model costs (i.e., training time) into account. Our evaluation is guided by scientific rigor, which, as our literature review shows, is missing in a large body of work. Our experimental results, based on more than 1500 measurements, reveal that in the smaller datasets, the simplest and cheaper non-neural methods are among the best performers. In the larger datasets, neural Transformers perform better in terms of classification effectiveness. However, when compared to the best (properly tuned) non-neural solutions, the gains in effectiveness are not very expressive, especially considering the much longer training times (up to 23x slower). Our findings call for a self-reflection of best practices in the field, from the way experiments are conducted and analyzed to the choice of proper baselines for each situation and scenario.
@article{cunha_cost-effectiveness_2021,
	title = {On the cost-effectiveness of neural and non-neural approaches and representations for text classification: {A} comprehensive comparative study},
	volume = {58},
	issn = {03064573},
	url = {https://doi.org/10.1016/j.ipm.2020.102481},
	doi = {10.1016/j.ipm.2020.102481},
	abstract = {This article brings two major contributions. First, we present the results of a critical analysis of recent scientific articles about neural and non-neural approaches and representations for automatic text classification (ATC). This analysis is focused on assessing the scientific rigor of such studies. It reveals a profusion of potential issues related to the experimental procedures including: (i) use of inadequate experimental protocols, including no repetitions for the sake of assessing variability and generalization; (ii) lack of statistical treatment of the results; (iii) lack of details on hyperparameter tuning, especially of the baselines; (iv) use of inadequate measures of classification effectiveness (e.g., accuracy with skewed distributions). Second, we provide some organization and ground to the field by performing a comprehensive and scientifically sound comparison of recent neural and non-neural ATC solutions. Our study provides a more complete picture by looking beyond classification effectiveness, taking the trade-off between model costs (i.e., training time) into account. Our evaluation is guided by scientific rigor, which, as our literature review shows, is missing in a large body of work. Our experimental results, based on more than 1500 measurements, reveal that in the smaller datasets, the simplest and cheaper non-neural methods are among the best performers. In the larger datasets, neural Transformers perform better in terms of classification effectiveness. However, when compared to the best (properly tuned) non-neural solutions, the gains in effectiveness are not very expressive, especially considering the much longer training times (up to 23x slower). Our findings call for a self-reflection of best practices in the field, from the way experiments are conducted and analyzed to the choice of proper baselines for each situation and scenario.},
	number = {3},
	journal = {Information Processing and Management},
	author = {Cunha, Washington and Mangaravite, Vítor and Gomes, Christian and Canuto, Sérgio and Resende, Elaine and Nascimento, Cecilia and Viegas, Felipe and França, Celso and Martins, Wellington Santos and Almeida, Jussara M. and Rosa, Thierson and Rocha, Leonardo and Gonçalves, Marcos André},
	year = {2021},
	note = {Publisher: Elsevier Ltd},
	keywords = {Comparative study, Systematic review, Text classification},
	pages = {102481},
}

Downloads: 0