Spatial Relation Based Object Extraction from the World Wide Web. Jingmin, H. & Lejian, L. 2008 IEEEWICACM International Conference on Web Intelligence and Intelligent Agent Technology, Ieee, 2008.
Spatial Relation Based Object Extraction from the World Wide Web [link]Website  abstract   bibtex   
The statistical results of observations show that regular spatial distribution characteristics exist for Web information about objects of the same type across different Web sites. The spatial distance between components within one object is always less than that between different objects. A novel method based on spatial configuration of Web document to extract object from the World Wide Web is presented. It demonstrates a fully automatic bottom-up process of object extraction. This method primarily considers the distribution characteristic of Web information and is independent of underlying documentation representation, such as HTML code. Experiments show that the proposed method can work well even when the HTML structure is far different from layout structure, and the results are encouraging.
@article{
 title = {Spatial Relation Based Object Extraction from the World Wide Web},
 type = {article},
 year = {2008},
 identifiers = {[object Object]},
 pages = {94-97},
 websites = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4740735},
 publisher = {Ieee},
 id = {e4e73d5f-ca5b-3695-a47f-adb007c7d8ad},
 created = {2012-02-09T21:39:35.000Z},
 file_attached = {false},
 profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
 group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
 last_modified = {2017-03-14T14:36:19.698Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {true},
 hidden = {false},
 citation_key = {Jingmin2008},
 private_publication = {false},
 abstract = {The statistical results of observations show that regular spatial distribution characteristics exist for Web information about objects of the same type across different Web sites. The spatial distance between components within one object is always less than that between different objects. A novel method based on spatial configuration of Web document to extract object from the World Wide Web is presented. It demonstrates a fully automatic bottom-up process of object extraction. This method primarily considers the distribution characteristic of Web information and is independent of underlying documentation representation, such as HTML code. Experiments show that the proposed method can work well even when the HTML structure is far different from layout structure, and the results are encouraging.},
 bibtype = {article},
 author = {Jingmin, Hao and Lejian, Liao},
 journal = {2008 IEEEWICACM International Conference on Web Intelligence and Intelligent Agent Technology}
}

Downloads: 0