Characteristics of Open Data CSV Files. Mitlöhner, J., Neumaier, S., Umbrich, J., & Polleres, A. In 2nd International Conference on Open and Big Data, August, 2016. Invited paperPaper doi abstract bibtex This work analyzes an Open Data corpus containing 200K tabular resources with a total file size of 413GB from a data consumer perspective. Our study shows that ∼10% of the resources in Open Data portals are labelled as a tabular data of which only 50% can be considered CSV files. The study inspects the general shape of these tabular data, reports on column and row distribution, analyses the availability of (multiple) header rows and if a file contains multiple tables. In addition, we inspect and analyze the table column types, detect missing values and report about the distribution of the values.
@inproceedings{mitl-etal-2016OBD,
author = {Mitl\"ohner, Johann and Neumaier, Sebastian and Umbrich, J\"urgen and Polleres, Axel},
booktitle = {2nd International Conference on Open and Big Data},
month = aug,
day = {22--24},
note = {Invited paper},
type = CONF,
abstract = {This work analyzes an Open Data corpus containing 200K tabular resources with a total file size of 413GB from a data consumer perspective. Our study shows that ∼10\% of the resources in Open Data portals are labelled as a tabular data of which only 50\% can be considered CSV files. The study inspects the general shape of these tabular data, reports on column and row distribution, analyses the availability of (multiple) header rows and if a file contains multiple tables. In addition, we inspect and analyze the table column types, detect missing values and report about the distribution of the values.},
title = {Characteristics of Open Data {CSV} Files},
year = 2016,
url = {http://polleres.net/publications/mitl-etal-2016OBD.pdf},
doi = {https://doi.org/10.1109/OBD.2016.18},
}
Downloads: 0
{"_id":"coMzuBrgT3wHiamWq","bibbaseid":"mitlhner-neumaier-umbrich-polleres-characteristicsofopendatacsvfiles-2016","downloads":0,"creationDate":"2016-07-05T14:15:11.455Z","title":"Characteristics of Open Data CSV Files","author_short":["Mitlöhner, J.","Neumaier, S.","Umbrich, J.","Polleres, A."],"year":2016,"bibtype":"inproceedings","biburl":"www.polleres.net/mypublications.bib","bibdata":{"bibtype":"inproceedings","type":"Conference paper","author":[{"propositions":[],"lastnames":["Mitlöhner"],"firstnames":["Johann"],"suffixes":[]},{"propositions":[],"lastnames":["Neumaier"],"firstnames":["Sebastian"],"suffixes":[]},{"propositions":[],"lastnames":["Umbrich"],"firstnames":["Jürgen"],"suffixes":[]},{"propositions":[],"lastnames":["Polleres"],"firstnames":["Axel"],"suffixes":[]}],"booktitle":"2nd International Conference on Open and Big Data","month":"August","day":"22–24","note":"Invited paper","abstract":"This work analyzes an Open Data corpus containing 200K tabular resources with a total file size of 413GB from a data consumer perspective. Our study shows that ∼10% of the resources in Open Data portals are labelled as a tabular data of which only 50% can be considered CSV files. The study inspects the general shape of these tabular data, reports on column and row distribution, analyses the availability of (multiple) header rows and if a file contains multiple tables. In addition, we inspect and analyze the table column types, detect missing values and report about the distribution of the values.","title":"Characteristics of Open Data CSV Files","year":"2016","url":"http://polleres.net/publications/mitl-etal-2016OBD.pdf","doi":"https://doi.org/10.1109/OBD.2016.18","bibtex":"@inproceedings{mitl-etal-2016OBD,\n\t author = {Mitl\\\"ohner, Johann and Neumaier, Sebastian and Umbrich, J\\\"urgen and Polleres, Axel},\n\t booktitle = {2nd International Conference on Open and Big Data},\n\t month = aug,\n day = {22--24},\n\t note = {Invited paper},\n type = CONF,\n abstract = {This work analyzes an Open Data corpus containing 200K tabular resources with a total file size of 413GB from a data consumer perspective. Our study shows that ∼10\\% of the resources in Open Data portals are labelled as a tabular data of which only 50\\% can be considered CSV files. The study inspects the general shape of these tabular data, reports on column and row distribution, analyses the availability of (multiple) header rows and if a file contains multiple tables. In addition, we inspect and analyze the table column types, detect missing values and report about the distribution of the values.},\n\t title = {Characteristics of Open Data {CSV} Files},\n\t year = 2016,\n url = {http://polleres.net/publications/mitl-etal-2016OBD.pdf},\n\t doi = {https://doi.org/10.1109/OBD.2016.18},\n}\n\n","author_short":["Mitlöhner, J.","Neumaier, S.","Umbrich, J.","Polleres, A."],"key":"mitl-etal-2016OBD","id":"mitl-etal-2016OBD","bibbaseid":"mitlhner-neumaier-umbrich-polleres-characteristicsofopendatacsvfiles-2016","role":"author","urls":{"Paper":"http://polleres.net/publications/mitl-etal-2016OBD.pdf"},"metadata":{"authorlinks":{"polleres, a":"https://bibbase.org/show?bib=www.polleres.net/mypublications.bib"}},"downloads":0,"html":""},"search_terms":["characteristics","open","data","csv","files","mitlöhner","neumaier","umbrich","polleres"],"keywords":[],"authorIDs":["545720922abc8e9f370000ae","5PFMiHGwfvbGBZwWF","5de7280d97054edf010000c3","5e02b1a419da8edf01000028","5e048450db7916df010000b1","5e06d565a0810cde0100009b","5e10e27445c12cde01000062","5e123345c196d3de01000074","5e14ba61e55ed8de01000072","5e189b4e779abfdf0100013f","5e216f7e5a651cdf010000eb","5e25b9fdf299d4de01000001","5e2d64605e7586df01000083","5e36e5e9b26a0fde0100005e","5e37d23b56571fde010000de","5e4ded1052c311f20100018e","5e51a3102793ecde010000e0","5e59a6b5ad6c7fde01000114","5e5d588ead47bcde01000072","5e60e857839e59df010000f1","A5AFuDAiNR4HEYiFD","BtzwZ6TFPsASbdqvo","DLdeXAmrbA4niYQzH","FyLDFGg993nDS2Spf","NCjPvWahWRjdP3ghB","XcyP3jptz7zE4ZLws","aiXjXMLP63k5WCt84","fTDcT5K3oSTcdxSBj","fbKNfWffDzdzubrER","haaAs2rQaQA7EaZva","nQX2P8WzFeKwcpLqd","nuWuyLnGu7YzMrn4d","pfENTBFWo85mRy3ik","rX6EShFR2rMFmQL2C","w6wHZukTjqqera7BR","woa42kCD35yCmdQTj","yPgvarsL7KAT9yfZd","yzkCNJMYNL8B3bni2","zDG3tj87ZfYXo7u9c"],"dataSources":["cBfwyqsLFQQMc4Fss","gixxkiKt6rtWGoKSh","QfLT6siHZuHw9MqvK"]}