KoKo: An L1 Learner Corpus for German. Abel, A., Glaznieks, A., Nicolas, L., & Stemle, E., W. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 2414-2421, 5, 2014. European Language Resources Association (ELRA).
abstract   bibtex   
We introduce the KoKo corpus, a collection of German L1 learner texts annotated with learner errors, along with the methods and tools used in its construction and evaluation. The corpus contains both texts and corresponding survey information from 1,319 pupils and amounts to around 716,000 tokens. The evaluation of the quality of the performed transcriptions and annotations shows an accuracy of orthographic error annotations of approximately 80% as well as high accuracy of transcriptions (> 99%), automatic tokenisation (> 99%), sentence splitting (> 96%) and POS-tagging (> 94%). The KoKo corpus will be published at the end of 2014 and be the first accessible linguistically annotated German L1 learner corpus. It will represent a valuable source for research and teaching on German as L1 language, in particular with regards to writing skills.
@inProceedings{
 title = {KoKo: An L1 Learner Corpus for German},
 type = {inProceedings},
 year = {2014},
 identifiers = {[object Object]},
 keywords = {German Language,Learner Corpora,learner corpora},
 pages = {2414-2421},
 websites = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/934_Paper.pdf},
 month = {5},
 publisher = {European Language Resources Association (ELRA)},
 city = {Reykjavik, Iceland},
 editors = {[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]},
 id = {00af6c6a-3ad9-36c6-a6e1-8ca4d9dd2bf8},
 created = {2015-09-12T17:37:41.000Z},
 file_attached = {false},
 profile_id = {94be607e-c433-369f-af1e-99020f3fddbe},
 group_id = {336b6408-ac60-3ac5-acc3-f17c991bd54f},
 last_modified = {2015-09-23T10:56:35.000Z},
 tags = {LT,Multiling},
 read = {true},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {ABEL14.934},
 user_context = {article},
 abstract = {We introduce the KoKo corpus, a collection of German L1 learner texts annotated with learner errors, along with the methods and tools used in its construction and evaluation. The corpus contains both texts and corresponding survey information from 1,319 pupils and amounts to around 716,000 tokens. The evaluation of the quality of the performed transcriptions and annotations shows an accuracy of orthographic error annotations of approximately 80% as well as high accuracy of transcriptions (> 99%), automatic tokenisation (> 99%), sentence splitting (> 96%) and POS-tagging (> 94%). The KoKo corpus will be published at the end of 2014 and be the first accessible linguistically annotated German L1 learner corpus. It will represent a valuable source for research and teaching on German as L1 language, in particular with regards to writing skills.},
 bibtype = {inProceedings},
 author = {Abel, Andrea and Glaznieks, Aivars and Nicolas, Lionel and Stemle, Egon W.},
 booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}
}

Downloads: 0