Wrapping Web Data into XML. Han, W., Buttler, D., & Pu, C. Sigmod Record, 30(3):33-38, ACM Press, 2001. Website abstract bibtex The vast majority of information that is available on- line, and coming online in this near future is only avail- able in HTML. In order to use this information for more than human browsing, it must be converted into a machine-readable format. Wrappers have been the key tool to make the conversion from HTML into se- mantically meaningful and well-structured XML data. However, developing wrappers is slow and tedious work with typically brittle results. This paper de- scribes XWRAP Elite, a tool to automatically gen- erate robust wrappers, which breaks down the conver- sion process into three procedures: discovering where the data is located in an HTML page and separating the data into individual objects; decomposing objects into data elements; marking objects and elements in an output format. XWRAP Elite automates the rst two procedures and requires minimal human involve- ment in marking output data. In addition, there is a code generation component to package all of the pieces into a stand-alone wrapper.
@article{
title = {Wrapping Web Data into XML},
type = {article},
year = {2001},
identifiers = {[object Object]},
pages = {33-38},
volume = {30},
websites = {http://dl.acm.org/citation.cfm?id=603867.603873},
publisher = {ACM Press},
id = {5590cde7-b69a-39fd-985c-d8eacf256532},
created = {2012-12-24T15:02:36.000Z},
file_attached = {false},
profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},
group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},
last_modified = {2017-03-14T14:36:19.698Z},
tags = {web data extraction},
read = {false},
starred = {false},
authored = {false},
confirmed = {true},
hidden = {false},
citation_key = {Han2001},
private_publication = {false},
abstract = {The vast majority of information that is available on- line, and coming online in this near future is only avail- able in HTML. In order to use this information for more than human browsing, it must be converted into a machine-readable format. Wrappers have been the key tool to make the conversion from HTML into se- mantically meaningful and well-structured XML data. However, developing wrappers is slow and tedious work with typically brittle results. This paper de- scribes XWRAP Elite, a tool to automatically gen- erate robust wrappers, which breaks down the conver- sion process into three procedures: discovering where the data is located in an HTML page and separating the data into individual objects; decomposing objects into data elements; marking objects and elements in an output format. XWRAP Elite automates the rst two procedures and requires minimal human involve- ment in marking output data. In addition, there is a code generation component to package all of the pieces into a stand-alone wrapper.},
bibtype = {article},
author = {Han, Wei and Buttler, David and Pu, Calton},
journal = {Sigmod Record},
number = {3}
}
Downloads: 0
{"_id":"gWX95dFYJL5MBvcGi","bibbaseid":"han-buttler-pu-wrappingwebdataintoxml-2001","authorIDs":[],"author_short":["Han, W.","Buttler, D.","Pu, C."],"bibdata":{"title":"Wrapping Web Data into XML","type":"article","year":"2001","identifiers":"[object Object]","pages":"33-38","volume":"30","websites":"http://dl.acm.org/citation.cfm?id=603867.603873","publisher":"ACM Press","id":"5590cde7-b69a-39fd-985c-d8eacf256532","created":"2012-12-24T15:02:36.000Z","file_attached":false,"profile_id":"5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6","group_id":"066b42c8-f712-3fc3-abb2-225c158d2704","last_modified":"2017-03-14T14:36:19.698Z","tags":"web data extraction","read":false,"starred":false,"authored":false,"confirmed":"true","hidden":false,"citation_key":"Han2001","private_publication":false,"abstract":"The vast majority of information that is available on- line, and coming online in this near future is only avail- able in HTML. In order to use this information for more than human browsing, it must be converted into a machine-readable format. Wrappers have been the key tool to make the conversion from HTML into se- mantically meaningful and well-structured XML data. However, developing wrappers is slow and tedious work with typically brittle results. This paper de- scribes XWRAP Elite, a tool to automatically gen- erate robust wrappers, which breaks down the conver- sion process into three procedures: discovering where the data is located in an HTML page and separating the data into individual objects; decomposing objects into data elements; marking objects and elements in an output format. XWRAP Elite automates the rst two procedures and requires minimal human involve- ment in marking output data. In addition, there is a code generation component to package all of the pieces into a stand-alone wrapper.","bibtype":"article","author":"Han, Wei and Buttler, David and Pu, Calton","journal":"Sigmod Record","number":"3","bibtex":"@article{\n title = {Wrapping Web Data into XML},\n type = {article},\n year = {2001},\n identifiers = {[object Object]},\n pages = {33-38},\n volume = {30},\n websites = {http://dl.acm.org/citation.cfm?id=603867.603873},\n publisher = {ACM Press},\n id = {5590cde7-b69a-39fd-985c-d8eacf256532},\n created = {2012-12-24T15:02:36.000Z},\n file_attached = {false},\n profile_id = {5284e6aa-156c-3ce5-bc0e-b80cf09f3ef6},\n group_id = {066b42c8-f712-3fc3-abb2-225c158d2704},\n last_modified = {2017-03-14T14:36:19.698Z},\n tags = {web data extraction},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {true},\n hidden = {false},\n citation_key = {Han2001},\n private_publication = {false},\n abstract = {The vast majority of information that is available on- line, and coming online in this near future is only avail- able in HTML. In order to use this information for more than human browsing, it must be converted into a machine-readable format. Wrappers have been the key tool to make the conversion from HTML into se- mantically meaningful and well-structured XML data. However, developing wrappers is slow and tedious work with typically brittle results. This paper de- scribes XWRAP Elite, a tool to automatically gen- erate robust wrappers, which breaks down the conver- sion process into three procedures: discovering where the data is located in an HTML page and separating the data into individual objects; decomposing objects into data elements; marking objects and elements in an output format. XWRAP Elite automates the rst two procedures and requires minimal human involve- ment in marking output data. In addition, there is a code generation component to package all of the pieces into a stand-alone wrapper.},\n bibtype = {article},\n author = {Han, Wei and Buttler, David and Pu, Calton},\n journal = {Sigmod Record},\n number = {3}\n}","author_short":["Han, W.","Buttler, D.","Pu, C."],"urls":{"Website":"http://dl.acm.org/citation.cfm?id=603867.603873"},"bibbaseid":"han-buttler-pu-wrappingwebdataintoxml-2001","role":"author","downloads":0,"html":""},"bibtype":"article","creationDate":"2020-02-06T23:48:12.219Z","downloads":0,"keywords":[],"search_terms":["wrapping","web","data","xml","han","buttler","pu"],"title":"Wrapping Web Data into XML","year":2001}