Extracting Geospatial Entities from Wikipedia. Witmer, J. & Kalita, J. 2009 IEEE International Conference on Semantic Computing, Ieee, 2009.
Extracting Geospatial Entities from Wikipedia [link]Website  abstract   bibtex   
This paper addresses the challenge of extracting geospa- tial data from the article text of the English Wikipedia. In the first phase of our work, we create a training corpus and select a set of word-based features to train a Support Vec- tor Machine (SVM) for the task of geospatial named entity recognition. We target for testing a corpus of Wikipedia articles about battles and wars, as these have a high in- cidence of geospatial content. The SVM recognizes place names in the corpus with a very high recall, close to 100%, with an acceptable precision. The set of geospatial NEs is then fed into a geocoding and resolution process, whose goal is to determine the correct coordinates for each place name. As many place names are ambiguous, and do not im- mediately geocode to a single location, we present a data structure and algorithm to resolve ambiguity based on sen- tence and article context, so the correct coordinates can be selected. We achieve an f-measure of 82%, and create a set of geospatial entities for each article, combining the place names, spatial locations, and an assumed point geometry. These entities can enable geospatial search on and geovi- sualization of Wikipedia.
@article{
 title = {Extracting Geospatial Entities from Wikipedia},
 type = {article},
 year = {2009},
 identifiers = {[object Object]},
 pages = {450-457},
 websites = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5298641},
 publisher = {Ieee},
 id = {680cb43c-1fb6-3669-8d4b-10c9be9ba163},
 created = {2012-02-09T21:39:35.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 tags = {named entities},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Witmer2009},
 private_publication = {false},
 abstract = {This paper addresses the challenge of extracting geospa- tial data from the article text of the English Wikipedia. In the first phase of our work, we create a training corpus and select a set of word-based features to train a Support Vec- tor Machine (SVM) for the task of geospatial named entity recognition. We target for testing a corpus of Wikipedia articles about battles and wars, as these have a high in- cidence of geospatial content. The SVM recognizes place names in the corpus with a very high recall, close to 100%, with an acceptable precision. The set of geospatial NEs is then fed into a geocoding and resolution process, whose goal is to determine the correct coordinates for each place name. As many place names are ambiguous, and do not im- mediately geocode to a single location, we present a data structure and algorithm to resolve ambiguity based on sen- tence and article context, so the correct coordinates can be selected. We achieve an f-measure of 82%, and create a set of geospatial entities for each article, combining the place names, spatial locations, and an assumed point geometry. These entities can enable geospatial search on and geovi- sualization of Wikipedia.},
 bibtype = {article},
 author = {Witmer, Jeremy and Kalita, Jugal},
 journal = {2009 IEEE International Conference on Semantic Computing}
}

Downloads: 0