Automated evaluation of consistency within the PubChem Compound database. Dashti, H., Wedell, J. R., Westler, W. M., Markley, J. L., & Eghbalnia, H. R. Scientific data, 6:190023, February, 2019. doi abstract bibtex Identification of discrepant data in aggregated databases is a key step in data curation and remediation. We have applied the ALATIS approach, which is based on the international chemical shift identifier (InChI) model, to the full PubChem Compound database to generate unique and reproducible compound and atom identifiers for all entries for which three-dimensional structures were available. This exercise also served to identify entries with discrepancies between structures and chemical formulas or InChI strings. The use of unique compound identifiers and atom nomenclature should support more rigorous links between small-molecule databases including those containing atom-specific information of the type available from crystallography and spectroscopy. The comprehensive results from this analysis are publicly available through our webserver [http://alatis.nmrfam.wisc.edu/].
@article{dashti_automated_2019,
title = {Automated evaluation of consistency within the {PubChem} {Compound} database.},
volume = {6},
issn = {2052-4463},
doi = {10.1038/sdata.2019.23},
abstract = {Identification of discrepant data in aggregated databases is a key step in data curation and remediation. We have applied the ALATIS approach, which is based on the international chemical shift identifier (InChI) model, to the full PubChem Compound database to generate unique and reproducible compound and atom identifiers for all entries for which three-dimensional structures were available. This exercise also served to identify entries with discrepancies between structures and chemical formulas or InChI strings. The use of unique compound identifiers and atom nomenclature should support more rigorous links between small-molecule databases including those containing atom-specific information of the type available from crystallography and spectroscopy. The comprehensive results from this analysis are publicly available through our webserver [http://alatis.nmrfam.wisc.edu/].},
language = {eng},
journal = {Scientific data},
author = {Dashti, Hesam and Wedell, Jonathan R. and Westler, William M. and Markley, John L. and Eghbalnia, Hamid R.},
month = feb,
year = {2019},
pmid = {30778259},
pmcid = {PMC6380220},
keywords = {InChI Consistency},
pages = {190023},
}
Downloads: 0
{"_id":"vPviW4jvFACujG4yM","bibbaseid":"dashti-wedell-westler-markley-eghbalnia-automatedevaluationofconsistencywithinthepubchemcompounddatabase-2019","author_short":["Dashti, H.","Wedell, J. R.","Westler, W. M.","Markley, J. L.","Eghbalnia, H. R."],"bibdata":{"bibtype":"article","type":"article","title":"Automated evaluation of consistency within the PubChem Compound database.","volume":"6","issn":"2052-4463","doi":"10.1038/sdata.2019.23","abstract":"Identification of discrepant data in aggregated databases is a key step in data curation and remediation. We have applied the ALATIS approach, which is based on the international chemical shift identifier (InChI) model, to the full PubChem Compound database to generate unique and reproducible compound and atom identifiers for all entries for which three-dimensional structures were available. This exercise also served to identify entries with discrepancies between structures and chemical formulas or InChI strings. The use of unique compound identifiers and atom nomenclature should support more rigorous links between small-molecule databases including those containing atom-specific information of the type available from crystallography and spectroscopy. The comprehensive results from this analysis are publicly available through our webserver [http://alatis.nmrfam.wisc.edu/].","language":"eng","journal":"Scientific data","author":[{"propositions":[],"lastnames":["Dashti"],"firstnames":["Hesam"],"suffixes":[]},{"propositions":[],"lastnames":["Wedell"],"firstnames":["Jonathan","R."],"suffixes":[]},{"propositions":[],"lastnames":["Westler"],"firstnames":["William","M."],"suffixes":[]},{"propositions":[],"lastnames":["Markley"],"firstnames":["John","L."],"suffixes":[]},{"propositions":[],"lastnames":["Eghbalnia"],"firstnames":["Hamid","R."],"suffixes":[]}],"month":"February","year":"2019","pmid":"30778259","pmcid":"PMC6380220","keywords":"InChI Consistency","pages":"190023","bibtex":"@article{dashti_automated_2019,\n\ttitle = {Automated evaluation of consistency within the {PubChem} {Compound} database.},\n\tvolume = {6},\n\tissn = {2052-4463},\n\tdoi = {10.1038/sdata.2019.23},\n\tabstract = {Identification of discrepant data in aggregated databases is a key step in data curation and remediation. We have applied the ALATIS approach, which is based on the international chemical shift identifier (InChI) model, to the full PubChem Compound database to generate unique and reproducible compound and atom identifiers for all entries for which three-dimensional structures were available. This exercise also served to identify entries with discrepancies between structures and chemical formulas or InChI strings. The use of unique compound identifiers and atom nomenclature should support more rigorous links between small-molecule databases including those containing atom-specific information of the type available from crystallography and spectroscopy. The comprehensive results from this analysis are publicly available through our webserver [http://alatis.nmrfam.wisc.edu/].},\n\tlanguage = {eng},\n\tjournal = {Scientific data},\n\tauthor = {Dashti, Hesam and Wedell, Jonathan R. and Westler, William M. and Markley, John L. and Eghbalnia, Hamid R.},\n\tmonth = feb,\n\tyear = {2019},\n\tpmid = {30778259},\n\tpmcid = {PMC6380220},\n\tkeywords = {InChI Consistency},\n\tpages = {190023},\n}\n\n","author_short":["Dashti, H.","Wedell, J. R.","Westler, W. M.","Markley, J. L.","Eghbalnia, H. R."],"key":"dashti_automated_2019","id":"dashti_automated_2019","bibbaseid":"dashti-wedell-westler-markley-eghbalnia-automatedevaluationofconsistencywithinthepubchemcompounddatabase-2019","role":"author","urls":{},"keyword":["InChI Consistency"],"metadata":{"authorlinks":{}}},"bibtype":"article","biburl":"https://api.zotero.org/users/4889195/collections/HS6MEV75/items?key=wohGlmhm95vydCMTcg4vFyPc&format=bibtex&limit=100","dataSources":["gz8y2pHfHWTWED42i","CT2QdQoQtu2wab8TD","K33yh3sL9wWE5fZcR","ho4BHrhBYi8mmNGK3","JGNRjejujgZaT5Woe","Rg7mRMhmZYnCLLEgE","2jPTcmjEXPKsta6A2","M95Ltr7FcMNgDrNnS","v9EfXaiy2Ff4odiJy","aQjY3RHAbbYYbJAHQ","NeGEqc3yeSTRFeF3b","WpnnivZJDcjEhWLni","sfXcPh2bSbvLi6Z3M","9L7f7koQxfA89CiT8","ErQXD9DjCg98AZjA5"],"keywords":["inchi consistency"],"search_terms":["automated","evaluation","consistency","within","pubchem","compound","database","dashti","wedell","westler","markley","eghbalnia"],"title":"Automated evaluation of consistency within the PubChem Compound database.","year":2019}