Robust Feature Selection Using Ensemble Feature Selection Techniques. Saeys, Y., Abeel, T., & Peer, Y., V., D. Journal of Biomedical Informatics, 42(5):313-325, Springer, 2008.
Robust Feature Selection Using Ensemble Feature Selection Techniques [link]Website  abstract   bibtex   
Named entity recognition is an extremely important and fundamental task of biomedical text mining. Biomedical named entities include mentions of proteins, genes, DNA, RNA, etc which often have complex structures, but it is challenging to identify and classify such entities. Machine learning methods like CRF, MEMM and SVM have been widely used for learning to recognize such entities from an annotated corpus. The identification of appropriate feature templates and the selection of the important feature values play a very important role in the success of these methods. In this paper, we provide a study on word clustering and selection based feature reduction approaches for named entity recognition using a maximum entropy classifier. The identification and selection of features are largely done automatically without using domain knowledge. The performance of the system is found to be superior to existing systems which do not use domain knowledge.
@article{
 title = {Robust Feature Selection Using Ensemble Feature Selection Techniques},
 type = {article},
 year = {2008},
 identifiers = {[object Object]},
 pages = {313-325},
 volume = {42},
 websites = {http://www.ncbi.nlm.nih.gov/pubmed/19908156},
 publisher = {Springer},
 series = {Lecture Notes in Computer Science},
 chapter = {21},
 editors = {[object Object],[object Object],[object Object]},
 id = {ac01cfa7-c461-3018-876e-71b79dee8549},
 created = {2011-12-28T07:04:55.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entities},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Saeys2008},
 private_publication = {false},
 abstract = {Named entity recognition is an extremely important and fundamental task of biomedical text mining. Biomedical named entities include mentions of proteins, genes, DNA, RNA, etc which often have complex structures, but it is challenging to identify and classify such entities. Machine learning methods like CRF, MEMM and SVM have been widely used for learning to recognize such entities from an annotated corpus. The identification of appropriate feature templates and the selection of the important feature values play a very important role in the success of these methods. In this paper, we provide a study on word clustering and selection based feature reduction approaches for named entity recognition using a maximum entropy classifier. The identification and selection of features are largely done automatically without using domain knowledge. The performance of the system is found to be superior to existing systems which do not use domain knowledge.},
 bibtype = {article},
 author = {Saeys, Yvan and Abeel, Thomas and Peer, Yves Van De},
 journal = {Journal of Biomedical Informatics},
 number = {5}
}

Downloads: 0