Author gender metadata augmentation of hathitrust digital library. Peng, Z., Chen, M., Kowalczyk, S., & Plale, B. Proceedings of the ASIST Annual Meeting, 2014.
doi  abstract   bibtex   
Bibliographic metadata is essential for digital library resource description. Especially as the size and number of bibliographic entities grows, high-quality metadata enables richer forms of digital library access, search, and use. Metadata records can be enriched through automated techniques. For example, a digital humanities scholar might use the gender of a set of authors during their literature analysis. In this study, we undertook to enrich the metadata description of a large-scale digital library, the HathiTrust (HT) digital library, specifically by determining the gender of authors of the public domain portion of the collection. The results are stored to a separate Solr index accessible through the HathiTrust Research Center services. This study, which successfully resolved in 78.9% of the cases the gender of authors in the HT public domain corpus, suggests future research directions in capturing and representing the provenance of the contributing sources to enhance trust, and in machine learning to resolve the remaining names.
@article{
 title = {Author gender metadata augmentation of hathitrust digital library},
 type = {article},
 year = {2014},
 volume = {51},
 id = {1dc768c3-bf0e-33cf-9181-3bc4eed6dda6},
 created = {2019-10-01T17:20:59.205Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2019-10-01T17:23:55.301Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Peng2014},
 folder_uuids = {73f994b4-a3be-4035-a6dd-3802077ce863},
 private_publication = {false},
 abstract = {Bibliographic metadata is essential for digital library resource description. Especially as the size and number of bibliographic entities grows, high-quality metadata enables richer forms of digital library access, search, and use. Metadata records can be enriched through automated techniques. For example, a digital humanities scholar might use the gender of a set of authors during their literature analysis. In this study, we undertook to enrich the metadata description of a large-scale digital library, the HathiTrust (HT) digital library, specifically by determining the gender of authors of the public domain portion of the collection. The results are stored to a separate Solr index accessible through the HathiTrust Research Center services. This study, which successfully resolved in 78.9% of the cases the gender of authors in the HT public domain corpus, suggests future research directions in capturing and representing the provenance of the contributing sources to enhance trust, and in machine learning to resolve the remaining names.},
 bibtype = {article},
 author = {Peng, Z. and Chen, M. and Kowalczyk, S. and Plale, B.},
 doi = {10.1002/meet.2014.14505101098},
 journal = {Proceedings of the ASIST Annual Meeting},
 number = {1}
}

Downloads: 0