Building a Question-Answering Corpus Using Social Media and News Articles

Building a Question-Answering Corpus Using Social Media and News Articles. Cavalin, P., Figueiredo, F., de Bayser, M., Moyano, L., Candello, H., Appel, A., & Souza, R. In International Conference on Computational Processing of the Portuguese Language, pages 353–358. Springer.

Paper abstract bibtex

Is it possible to develop a reliable QA-Corpus using social media data? What are the challenges faced when attempting such a task? In this paper, we discuss these questions and present our findings when developing a QA-Corpus on the topic of Brazilian finance. In order to populate our corpus, we relied on opinions from experts on Brazilian finance that are active on the Twitter application. From these experts, we extracted information from news websites that are used as answers in the corpus. Moreover, to effectively provide rankings of answers to questions, we employ novel word vector based similarity measures between short sentences (that accounts for both questions and Tweets). We validated our methods on a recently released dataset of similarity between short Portuguese sentences. Finally, we also discuss the effectiveness of our approach when used to rank answers to questions from real users.

@inproceedings{cavalin_building_2016,
	location = {Tomar, Portugal},
	title = {Building a Question-Answering Corpus Using Social Media and News Articles},
	url = {https://renan-souza.github.io/files/PROPOR16.pdf},
	abstract = {Is it possible to develop a reliable {QA}-Corpus using social media data? What are the challenges faced when attempting such a task? In this paper, we discuss these questions and present our findings when developing a {QA}-Corpus on the topic of Brazilian finance. In order to populate our corpus, we relied on opinions from experts on Brazilian finance that are active on the Twitter application. From these experts, we extracted information from news websites that are used as answers in the corpus. Moreover, to effectively provide rankings of answers to questions, we employ novel word vector based similarity measures between short sentences (that accounts for both questions and Tweets). We validated our methods on a recently released dataset of similarity between short Portuguese sentences. Finally, we also discuss the effectiveness of our approach when used to rank answers to questions from real users.},
	pages = {353--358},
	booktitle = {International Conference on Computational Processing of the Portuguese Language},
	publisher = {Springer},
	author = {Cavalin, Paulo and Figueiredo, Flavio and de Bayser, Maíra and Moyano, Luis and Candello, Heloisa and Appel, Ana and Souza, Renan},
	urldate = {2016-10-17},
	date = {2016},
	file = {[PDF] from github.io:/Users/rfsouza/Library/Application Support/Zotero/Profiles/33j3px8o.default/zotero/storage/TSNUPRQA/Cavalin et al. - 2016 - Building a Question-Answering Corpus Using Social .pdf:application/pdf}
}

Downloads: 0

{"_id":"BvR83moyA2TQ4pp5o","bibbaseid":"cavalin-figueiredo-debayser-moyano-candello-appel-souza-buildingaquestionansweringcorpususingsocialmediaandnewsarticles","downloads":0,"creationDate":"2016-10-18T02:58:27.164Z","title":"Building a Question-Answering Corpus Using Social Media and News Articles","author_short":["Cavalin, P.","Figueiredo, F.","de Bayser, M.","Moyano, L.","Candello, H.","Appel, A.","Souza, R."],"year":null,"bibtype":"inproceedings","biburl":"http://raw.githubusercontent.com/renan-souza/renan-souza.github.io/master/Renans-Publications.bib","bibdata":{"bibtype":"inproceedings","type":"inproceedings","location":"Tomar, Portugal","title":"Building a Question-Answering Corpus Using Social Media and News Articles","url":"https://renan-souza.github.io/files/PROPOR16.pdf","abstract":"Is it possible to develop a reliable QA-Corpus using social media data? What are the challenges faced when attempting such a task? In this paper, we discuss these questions and present our findings when developing a QA-Corpus on the topic of Brazilian finance. In order to populate our corpus, we relied on opinions from experts on Brazilian finance that are active on the Twitter application. From these experts, we extracted information from news websites that are used as answers in the corpus. Moreover, to effectively provide rankings of answers to questions, we employ novel word vector based similarity measures between short sentences (that accounts for both questions and Tweets). We validated our methods on a recently released dataset of similarity between short Portuguese sentences. Finally, we also discuss the effectiveness of our approach when used to rank answers to questions from real users.","pages":"353–358","booktitle":"International Conference on Computational Processing of the Portuguese Language","publisher":"Springer","author":[{"propositions":[],"lastnames":["Cavalin"],"firstnames":["Paulo"],"suffixes":[]},{"propositions":[],"lastnames":["Figueiredo"],"firstnames":["Flavio"],"suffixes":[]},{"propositions":["de"],"lastnames":["Bayser"],"firstnames":["Maíra"],"suffixes":[]},{"propositions":[],"lastnames":["Moyano"],"firstnames":["Luis"],"suffixes":[]},{"propositions":[],"lastnames":["Candello"],"firstnames":["Heloisa"],"suffixes":[]},{"propositions":[],"lastnames":["Appel"],"firstnames":["Ana"],"suffixes":[]},{"propositions":[],"lastnames":["Souza"],"firstnames":["Renan"],"suffixes":[]}],"urldate":"2016-10-17","date":"2016","file":"[PDF] from github.io:/Users/rfsouza/Library/Application Support/Zotero/Profiles/33j3px8o.default/zotero/storage/TSNUPRQA/Cavalin et al. - 2016 - Building a Question-Answering Corpus Using Social .pdf:application/pdf","bibtex":"@inproceedings{cavalin_building_2016,\n\tlocation = {Tomar, Portugal},\n\ttitle = {Building a Question-Answering Corpus Using Social Media and News Articles},\n\turl = {https://renan-souza.github.io/files/PROPOR16.pdf},\n\tabstract = {Is it possible to develop a reliable {QA}-Corpus using social media data? What are the challenges faced when attempting such a task? In this paper, we discuss these questions and present our findings when developing a {QA}-Corpus on the topic of Brazilian finance. In order to populate our corpus, we relied on opinions from experts on Brazilian finance that are active on the Twitter application. From these experts, we extracted information from news websites that are used as answers in the corpus. Moreover, to effectively provide rankings of answers to questions, we employ novel word vector based similarity measures between short sentences (that accounts for both questions and Tweets). We validated our methods on a recently released dataset of similarity between short Portuguese sentences. Finally, we also discuss the effectiveness of our approach when used to rank answers to questions from real users.},\n\tpages = {353--358},\n\tbooktitle = {International Conference on Computational Processing of the Portuguese Language},\n\tpublisher = {Springer},\n\tauthor = {Cavalin, Paulo and Figueiredo, Flavio and de Bayser, Maíra and Moyano, Luis and Candello, Heloisa and Appel, Ana and Souza, Renan},\n\turldate = {2016-10-17},\n\tdate = {2016},\n\tfile = {[PDF] from github.io:/Users/rfsouza/Library/Application Support/Zotero/Profiles/33j3px8o.default/zotero/storage/TSNUPRQA/Cavalin et al. - 2016 - Building a Question-Answering Corpus Using Social .pdf:application/pdf}\n}\n\n","author_short":["Cavalin, P.","Figueiredo, F.","de Bayser, M.","Moyano, L.","Candello, H.","Appel, A.","Souza, R."],"key":"cavalin_building_2016","id":"cavalin_building_2016","bibbaseid":"cavalin-figueiredo-debayser-moyano-candello-appel-souza-buildingaquestionansweringcorpususingsocialmediaandnewsarticles","role":"author","urls":{"Paper":"https://renan-souza.github.io/files/PROPOR16.pdf"},"downloads":0},"search_terms":["building","question","answering","corpus","using","social","media","news","articles","cavalin","figueiredo","de bayser","moyano","candello","appel","souza"],"keywords":[],"authorIDs":[],"dataSources":["NetmjR9Zk4dDJod22"]}