ODArchive - Creating an archive for structured data from Open Data Portals. Weber, T., Mitlöhner, J., Neumaier, S., & Polleres, A. In Proceedings of the 19th International Semantic Web Conference (ISWC 2020), volume 12507, of Lecture Notes in Computer Science (LNCS), pages 311–327, Virtual Conference (Athens, Greece), November, 2020. Springer. Paper doi abstract bibtex We present ODArchive, a large corpus of structured data collected from over 260 Open Data portals worldwide, alongside with curated, integrated metadata. Furthermore we enrich the harvested datasets by heuristic annotations using the type hierarchies in existing Knowledge Graphs. We both (i) present the underlying distributed architecture to scale up regular harvesting and monitoring changes on these portals, and (ii) make the corpus available via different APIs. Moreover, we (iii) analys the characteristics of tabular data within the corpus. Our APIs can be used to regularly run such analyses or to reproduce experiments from the literature that have worked on static, not publicly available corpora.
@inproceedings{webe-etal-2020ISWC,
author = {Thomas Weber and Johann Mitl\"ohner and Sebastian Neumaier and Axel Polleres},
title = {ODArchive - Creating an archive for structured data from Open Data Portals},
abstract = {We present ODArchive, a large corpus of structured data collected from over 260 Open Data portals worldwide, alongside with curated, integrated metadata. Furthermore we enrich the harvested datasets by heuristic annotations using the type hierarchies in existing Knowledge Graphs. We both (i) present the underlying distributed architecture to scale up regular harvesting and monitoring changes on these portals, and (ii) make the corpus available via different APIs. Moreover, we (iii) analys the characteristics of tabular data within the corpus. Our APIs can be used to regularly run such analyses or to reproduce experiments from the literature that have worked on static, not publicly available corpora.},
month = nov,
day = {2--6},
year = 2020,
booktitle = {Proceedings of the 19th International Semantic Web Conference (ISWC 2020)},
address = {Virtual Conference (Athens, Greece)},
series = LNCS,
volume = 12507,
pages={311--327},
publisher = {Springer},
url = {http://polleres.net/publications/webe-etal-2020ISWC.pdf},
doi = {https://doi.org/10.1007/978-3-030-62466-8_20}
}
Downloads: 0
{"_id":"GNe54TZv4gd5cK2Wt","bibbaseid":"weber-mitlhner-neumaier-polleres-odarchivecreatinganarchiveforstructureddatafromopendataportals-2020","authorIDs":["FyLDFGg993nDS2Spf"],"author_short":["Weber, T.","Mitlöhner, J.","Neumaier, S.","Polleres, A."],"bibdata":{"bibtype":"inproceedings","type":"inproceedings","author":[{"firstnames":["Thomas"],"propositions":[],"lastnames":["Weber"],"suffixes":[]},{"firstnames":["Johann"],"propositions":[],"lastnames":["Mitlöhner"],"suffixes":[]},{"firstnames":["Sebastian"],"propositions":[],"lastnames":["Neumaier"],"suffixes":[]},{"firstnames":["Axel"],"propositions":[],"lastnames":["Polleres"],"suffixes":[]}],"title":"ODArchive - Creating an archive for structured data from Open Data Portals","abstract":"We present ODArchive, a large corpus of structured data collected from over 260 Open Data portals worldwide, alongside with curated, integrated metadata. Furthermore we enrich the harvested datasets by heuristic annotations using the type hierarchies in existing Knowledge Graphs. We both (i) present the underlying distributed architecture to scale up regular harvesting and monitoring changes on these portals, and (ii) make the corpus available via different APIs. Moreover, we (iii) analys the characteristics of tabular data within the corpus. Our APIs can be used to regularly run such analyses or to reproduce experiments from the literature that have worked on static, not publicly available corpora.","month":"November","day":"2–6","year":"2020","booktitle":"Proceedings of the 19th International Semantic Web Conference (ISWC 2020)","address":"Virtual Conference (Athens, Greece)","series":"Lecture Notes in Computer Science (LNCS)","volume":"12507","pages":"311–327","publisher":"Springer","url":"http://polleres.net/publications/webe-etal-2020ISWC.pdf","doi":"https://doi.org/10.1007/978-3-030-62466-8_20","bibtex":"@inproceedings{webe-etal-2020ISWC,\n author = {Thomas Weber and Johann\tMitl\\\"ohner and Sebastian Neumaier and Axel Polleres},\n title = {ODArchive - Creating an archive for structured data from Open Data Portals},\n abstract = {We present ODArchive, a large corpus of structured data collected from over 260 Open Data portals worldwide, alongside with curated, integrated metadata. Furthermore we enrich the harvested datasets by heuristic annotations using the type hierarchies in existing Knowledge Graphs. We both (i) present the underlying distributed architecture to scale up regular harvesting and monitoring changes on these portals, and (ii) make the corpus available via different APIs. Moreover, we (iii) analys the characteristics of tabular data within the corpus. Our APIs can be used to regularly run such analyses or to reproduce experiments from the literature that have worked on static, not publicly available corpora.},\n month = nov,\n day = {2--6},\n year = 2020,\n booktitle = {Proceedings of the 19th International Semantic Web Conference (ISWC 2020)},\n address = {Virtual Conference (Athens, Greece)},\n series = LNCS,\n volume = 12507,\n pages={311--327},\n publisher = {Springer},\n url = {http://polleres.net/publications/webe-etal-2020ISWC.pdf},\n doi = {https://doi.org/10.1007/978-3-030-62466-8_20}\n}\n\n","author_short":["Weber, T.","Mitlöhner, J.","Neumaier, S.","Polleres, A."],"key":"webe-etal-2020ISWC","id":"webe-etal-2020ISWC","bibbaseid":"weber-mitlhner-neumaier-polleres-odarchivecreatinganarchiveforstructureddatafromopendataportals-2020","role":"author","urls":{"Paper":"http://polleres.net/publications/webe-etal-2020ISWC.pdf"},"metadata":{"authorlinks":{"polleres, a":"https://bibbase.org/show?bib=www.polleres.net/mypublications.bib"}},"downloads":0,"html":""},"bibtype":"inproceedings","biburl":"www.polleres.net/mypublications.bib","creationDate":"2020-08-17T23:25:01.895Z","downloads":0,"keywords":[],"search_terms":["odarchive","creating","archive","structured","data","open","data","portals","weber","mitlöhner","neumaier","polleres"],"title":"ODArchive - Creating an archive for structured data from Open Data Portals","year":2020,"dataSources":["cBfwyqsLFQQMc4Fss","gixxkiKt6rtWGoKSh","QfLT6siHZuHw9MqvK"]}