The Danish Gigaword Corpus. Strømberg-Derczynski, L., Ciosici, M., Baglini, R., Christiansen, M. H., Dalsgaard, J. A., Fusaroli, R., Henrichsen, P. J., Hvingelby, R., Kirkedal, A., Kjeldsen, A. S., Ladefoged, C., Nielsen, F. Å., Madsen, J., Petersen, M. L., Rystrøm, J. H., & Varab, D. In Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), pages 413–421, Reykjavik, Iceland (Online), May31–2 June, 2021. Linköping University Electronic Press, Sweden. Paper abstract bibtex Danish language technology has been hindered by a lack of broad-coverage corpora at the scale modern NLP prefers. This paper describes the Danish Gigaword Corpus, the result of a focused effort to provide a diverse and freely-available one billion word corpus of Danish text. The Danish Gigaword corpus covers a wide array of time periods, domains, speakers' socio-economic status, and Danish dialects.
@inproceedings{stromberg-derczynski-etal-2021-danish,
abstract = {Danish language technology has been hindered by a lack of broad-coverage corpora at the scale modern NLP prefers. This paper describes the Danish Gigaword Corpus, the result of a focused effort to provide a diverse and freely-available one billion word corpus of Danish text. The Danish Gigaword corpus covers a wide array of time periods, domains, speakers{'} socio-economic status, and Danish dialects.},
address = {Reykjavik, Iceland (Online)},
author = {Str{\o}mberg-Derczynski, Leon and Ciosici, Manuel and Baglini, Rebekah and Christiansen, Morten H. and Dalsgaard, Jacob Aarup and Fusaroli, Riccardo and Henrichsen, Peter Juel and Hvingelby, Rasmus and Kirkedal, Andreas and Kjeldsen, Alex Speed and Ladefoged, Claus and Nielsen, Finn {\AA}rup and Madsen, Jens and Petersen, Malte Lau and Rystr{\o}m, Jonathan Hvithamar and Varab, Daniel},
booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)},
month = may # { 31--2 } # jun,
pages = {413--421},
publisher = {Link{\"o}ping University Electronic Press, Sweden},
title = {The {D}anish {G}igaword Corpus},
url = {https://aclanthology.org/2021.nodalida-main.46},
year = {2021},
bdsk-url-1 = {https://aclanthology.org/2021.nodalida-main.46}}
Downloads: 0
{"_id":"jWSh5iN6g9m9PAnCi","bibbaseid":"strmbergderczynski-ciosici-baglini-christiansen-dalsgaard-fusaroli-henrichsen-hvingelby-etal-thedanishgigawordcorpus-2021","author_short":["Strømberg-Derczynski, L.","Ciosici, M.","Baglini, R.","Christiansen, M. H.","Dalsgaard, J. A.","Fusaroli, R.","Henrichsen, P. J.","Hvingelby, R.","Kirkedal, A.","Kjeldsen, A. S.","Ladefoged, C.","Nielsen, F. Å.","Madsen, J.","Petersen, M. L.","Rystrøm, J. H.","Varab, D."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","abstract":"Danish language technology has been hindered by a lack of broad-coverage corpora at the scale modern NLP prefers. This paper describes the Danish Gigaword Corpus, the result of a focused effort to provide a diverse and freely-available one billion word corpus of Danish text. The Danish Gigaword corpus covers a wide array of time periods, domains, speakers' socio-economic status, and Danish dialects.","address":"Reykjavik, Iceland (Online)","author":[{"propositions":[],"lastnames":["Strømberg-Derczynski"],"firstnames":["Leon"],"suffixes":[]},{"propositions":[],"lastnames":["Ciosici"],"firstnames":["Manuel"],"suffixes":[]},{"propositions":[],"lastnames":["Baglini"],"firstnames":["Rebekah"],"suffixes":[]},{"propositions":[],"lastnames":["Christiansen"],"firstnames":["Morten","H."],"suffixes":[]},{"propositions":[],"lastnames":["Dalsgaard"],"firstnames":["Jacob","Aarup"],"suffixes":[]},{"propositions":[],"lastnames":["Fusaroli"],"firstnames":["Riccardo"],"suffixes":[]},{"propositions":[],"lastnames":["Henrichsen"],"firstnames":["Peter","Juel"],"suffixes":[]},{"propositions":[],"lastnames":["Hvingelby"],"firstnames":["Rasmus"],"suffixes":[]},{"propositions":[],"lastnames":["Kirkedal"],"firstnames":["Andreas"],"suffixes":[]},{"propositions":[],"lastnames":["Kjeldsen"],"firstnames":["Alex","Speed"],"suffixes":[]},{"propositions":[],"lastnames":["Ladefoged"],"firstnames":["Claus"],"suffixes":[]},{"propositions":[],"lastnames":["Nielsen"],"firstnames":["Finn","Årup"],"suffixes":[]},{"propositions":[],"lastnames":["Madsen"],"firstnames":["Jens"],"suffixes":[]},{"propositions":[],"lastnames":["Petersen"],"firstnames":["Malte","Lau"],"suffixes":[]},{"propositions":[],"lastnames":["Rystrøm"],"firstnames":["Jonathan","Hvithamar"],"suffixes":[]},{"propositions":[],"lastnames":["Varab"],"firstnames":["Daniel"],"suffixes":[]}],"booktitle":"Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)","month":"May31–2 June","pages":"413–421","publisher":"Linköping University Electronic Press, Sweden","title":"The Danish Gigaword Corpus","url":"https://aclanthology.org/2021.nodalida-main.46","year":"2021","bdsk-url-1":"https://aclanthology.org/2021.nodalida-main.46","bibtex":"@inproceedings{stromberg-derczynski-etal-2021-danish,\n\tabstract = {Danish language technology has been hindered by a lack of broad-coverage corpora at the scale modern NLP prefers. This paper describes the Danish Gigaword Corpus, the result of a focused effort to provide a diverse and freely-available one billion word corpus of Danish text. The Danish Gigaword corpus covers a wide array of time periods, domains, speakers{'} socio-economic status, and Danish dialects.},\n\taddress = {Reykjavik, Iceland (Online)},\n\tauthor = {Str{\\o}mberg-Derczynski, Leon and Ciosici, Manuel and Baglini, Rebekah and Christiansen, Morten H. and Dalsgaard, Jacob Aarup and Fusaroli, Riccardo and Henrichsen, Peter Juel and Hvingelby, Rasmus and Kirkedal, Andreas and Kjeldsen, Alex Speed and Ladefoged, Claus and Nielsen, Finn {\\AA}rup and Madsen, Jens and Petersen, Malte Lau and Rystr{\\o}m, Jonathan Hvithamar and Varab, Daniel},\n\tbooktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)},\n\tmonth = may # { 31--2 } # jun,\n\tpages = {413--421},\n\tpublisher = {Link{\\\"o}ping University Electronic Press, Sweden},\n\ttitle = {The {D}anish {G}igaword Corpus},\n\turl = {https://aclanthology.org/2021.nodalida-main.46},\n\tyear = {2021},\n\tbdsk-url-1 = {https://aclanthology.org/2021.nodalida-main.46}}\n\n","author_short":["Strømberg-Derczynski, L.","Ciosici, M.","Baglini, R.","Christiansen, M. H.","Dalsgaard, J. A.","Fusaroli, R.","Henrichsen, P. J.","Hvingelby, R.","Kirkedal, A.","Kjeldsen, A. S.","Ladefoged, C.","Nielsen, F. Å.","Madsen, J.","Petersen, M. L.","Rystrøm, J. H.","Varab, D."],"bibbaseid":"strmbergderczynski-ciosici-baglini-christiansen-dalsgaard-fusaroli-henrichsen-hvingelby-etal-thedanishgigawordcorpus-2021","role":"author","urls":{"Paper":"https://aclanthology.org/2021.nodalida-main.46"},"metadata":{"authorlinks":{}}},"bibtype":"inproceedings","biburl":"https://bibbase.org/f/rgWMwNyg47s6MAuEJ/manuelc-2023.bib","dataSources":["bNHGR4jWCTLTnHxvM"],"keywords":[],"search_terms":["danish","gigaword","corpus","strømberg-derczynski","ciosici","baglini","christiansen","dalsgaard","fusaroli","henrichsen","hvingelby","kirkedal","kjeldsen","ladefoged","nielsen","madsen","petersen","rystrøm","varab"],"title":"The Danish Gigaword Corpus","year":2021}