Enhancing Semantic Role Labeling for Tweets Using Self-Training. Liu, X., Li, K., Zhou, M., & Xiong, Z. Entropy, 2009.
Enhancing Semantic Role Labeling for Tweets Using Self-Training [link]Website  abstract   bibtex   
Semantic Role Labeling (SRL) for tweets is a meaningful task that can benefit a wide range of applications such as finegrained information extraction and retrieval from tweets. One main challenge of the task is the lack of annotated tweets, which is required to train a statistical model. We introduce self-training to SRL, leveraging abundant unlabeled tweets to alleviate its depending on annotated tweets. A novel strategy of tweet selection is presented, ensuring the chosen tweets are both correct and informative. More specifically, the correctness is estimated according to the labeling confidences and agreement of two Conditional Random Fields based labelers, which are trained on the randomly evenly spitted labeled data; while the informativeness is in proportion to the maximum distance between the tweet and the already selected tweets. We evaluate our method on a human annotated data set and show that bootstrapping improve a baseline by 3.4% F1.
@article{
 title = {Enhancing Semantic Role Labeling for Tweets Using Self-Training},
 type = {article},
 year = {2009},
 keywords = {natural language processing},
 pages = {896-901},
 websites = {http://www.aaai.org/ocs/index.php/AAAI/AAAI11/paper/download/3425/3965},
 id = {d07d97f3-cb17-39df-8bf8-6d93454f769f},
 created = {2012-04-01T16:32:49.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {semantic role labeling,twitter},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Liu2009d},
 private_publication = {false},
 abstract = {Semantic Role Labeling (SRL) for tweets is a meaningful task that can benefit a wide range of applications such as finegrained information extraction and retrieval from tweets. One main challenge of the task is the lack of annotated tweets, which is required to train a statistical model. We introduce self-training to SRL, leveraging abundant unlabeled tweets to alleviate its depending on annotated tweets. A novel strategy of tweet selection is presented, ensuring the chosen tweets are both correct and informative. More specifically, the correctness is estimated according to the labeling confidences and agreement of two Conditional Random Fields based labelers, which are trained on the randomly evenly spitted labeled data; while the informativeness is in proportion to the maximum distance between the tweet and the already selected tweets. We evaluate our method on a human annotated data set and show that bootstrapping improve a baseline by 3.4% F1.},
 bibtype = {article},
 author = {Liu, Xiaohua and Li, Kuan and Zhou, Ming and Xiong, Zhongyang},
 journal = {Entropy}
}

Downloads: 0