Extracting Geospatial Entities from Wikipedia

Extracting Geospatial Entities from Wikipedia. Witmer, J. & Kalita, J. 2009 IEEE International Conference on Semantic Computing, Ieee, 2009.

Website abstract bibtex

This paper addresses the challenge of extracting geospa- tial data from the article text of the English Wikipedia. In the first phase of our work, we create a training corpus and select a set of word-based features to train a Support Vec- tor Machine (SVM) for the task of geospatial named entity recognition. We target for testing a corpus of Wikipedia articles about battles and wars, as these have a high in- cidence of geospatial content. The SVM recognizes place names in the corpus with a very high recall, close to 100%, with an acceptable precision. The set of geospatial NEs is then fed into a geocoding and resolution process, whose goal is to determine the correct coordinates for each place name. As many place names are ambiguous, and do not im- mediately geocode to a single location, we present a data structure and algorithm to resolve ambiguity based on sen- tence and article context, so the correct coordinates can be selected. We achieve an f-measure of 82%, and create a set of geospatial entities for each article, combining the place names, spatial locations, and an assumed point geometry. These entities can enable geospatial search on and geovi- sualization of Wikipedia.

@article{
 title = {Extracting Geospatial Entities from Wikipedia},
 type = {article},
 year = {2009},
 identifiers = {[object Object]},
 pages = {450-457},
 websites = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5298641},
 publisher = {Ieee},
 id = {680cb43c-1fb6-3669-8d4b-10c9be9ba163},
 created = {2012-02-09T21:39:35.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entities},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Witmer2009},
 private_publication = {false},
 abstract = {This paper addresses the challenge of extracting geospa- tial data from the article text of the English Wikipedia. In the first phase of our work, we create a training corpus and select a set of word-based features to train a Support Vec- tor Machine (SVM) for the task of geospatial named entity recognition. We target for testing a corpus of Wikipedia articles about battles and wars, as these have a high in- cidence of geospatial content. The SVM recognizes place names in the corpus with a very high recall, close to 100%, with an acceptable precision. The set of geospatial NEs is then fed into a geocoding and resolution process, whose goal is to determine the correct coordinates for each place name. As many place names are ambiguous, and do not im- mediately geocode to a single location, we present a data structure and algorithm to resolve ambiguity based on sen- tence and article context, so the correct coordinates can be selected. We achieve an f-measure of 82%, and create a set of geospatial entities for each article, combining the place names, spatial locations, and an assumed point geometry. These entities can enable geospatial search on and geovi- sualization of Wikipedia.},
 bibtype = {article},
 author = {Witmer, Jeremy and Kalita, Jugal},
 journal = {2009 IEEE International Conference on Semantic Computing}
}

Downloads: 0

{"_id":"BvFxGQJNEe77nHnWX","bibbaseid":"witmer-kalita-extractinggeospatialentitiesfromwikipedia-2009","authorIDs":[],"author_short":["Witmer, J.","Kalita, J."],"bibdata":{"title":"Extracting Geospatial Entities from Wikipedia","type":"article","year":"2009","identifiers":"[object Object]","pages":"450-457","websites":"http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5298641","publisher":"Ieee","id":"680cb43c-1fb6-3669-8d4b-10c9be9ba163","created":"2012-02-09T21:39:35.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"named entities","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Witmer2009","private_publication":false,"abstract":"This paper addresses the challenge of extracting geospa- tial data from the article text of the English Wikipedia. In the first phase of our work, we create a training corpus and select a set of word-based features to train a Support Vec- tor Machine (SVM) for the task of geospatial named entity recognition. We target for testing a corpus of Wikipedia articles about battles and wars, as these have a high in- cidence of geospatial content. The SVM recognizes place names in the corpus with a very high recall, close to 100%, with an acceptable precision. The set of geospatial NEs is then fed into a geocoding and resolution process, whose goal is to determine the correct coordinates for each place name. As many place names are ambiguous, and do not im- mediately geocode to a single location, we present a data structure and algorithm to resolve ambiguity based on sen- tence and article context, so the correct coordinates can be selected. We achieve an f-measure of 82%, and create a set of geospatial entities for each article, combining the place names, spatial locations, and an assumed point geometry. These entities can enable geospatial search on and geovi- sualization of Wikipedia.","bibtype":"article","author":"Witmer, Jeremy and Kalita, Jugal","journal":"2009 IEEE International Conference on Semantic Computing","bibtex":"@article{\n title = {Extracting Geospatial Entities from Wikipedia},\n type = {article},\n year = {2009},\n identifiers = {[object Object]},\n pages = {450-457},\n websites = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5298641},\n publisher = {Ieee},\n id = {680cb43c-1fb6-3669-8d4b-10c9be9ba163},\n created = {2012-02-09T21:39:35.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {named entities},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Witmer2009},\n private_publication = {false},\n abstract = {This paper addresses the challenge of extracting geospa- tial data from the article text of the English Wikipedia. In the first phase of our work, we create a training corpus and select a set of word-based features to train a Support Vec- tor Machine (SVM) for the task of geospatial named entity recognition. We target for testing a corpus of Wikipedia articles about battles and wars, as these have a high in- cidence of geospatial content. The SVM recognizes place names in the corpus with a very high recall, close to 100%, with an acceptable precision. The set of geospatial NEs is then fed into a geocoding and resolution process, whose goal is to determine the correct coordinates for each place name. As many place names are ambiguous, and do not im- mediately geocode to a single location, we present a data structure and algorithm to resolve ambiguity based on sen- tence and article context, so the correct coordinates can be selected. We achieve an f-measure of 82%, and create a set of geospatial entities for each article, combining the place names, spatial locations, and an assumed point geometry. These entities can enable geospatial search on and geovi- sualization of Wikipedia.},\n bibtype = {article},\n author = {Witmer, Jeremy and Kalita, Jugal},\n journal = {2009 IEEE International Conference on Semantic Computing}\n}","author_short":["Witmer, J.","Kalita, J."],"urls":{"Website":"http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5298641"},"bibbaseid":"witmer-kalita-extractinggeospatialentitiesfromwikipedia-2009","role":"author","downloads":0,"html":""},"bibtype":"article","creationDate":"2020-02-06T23:48:11.972Z","downloads":0,"keywords":[],"search_terms":["extracting","geospatial","entities","wikipedia","witmer","kalita"],"title":"Extracting Geospatial Entities from Wikipedia","year":2009}