Prediction of Population Health Indices from Social Media using Kernel-based Textual and Temporal Features. Nguyen, T., Nguyen, D. T., Larsen, M. E., O'Dea, B., Yearwood, J., Phung, D., Venkatesh, S., & Christensen, H. In Proceedings of the 26th International Conference on World Wide Web Companion, of WWW '17 Companion, pages 99–107, Republic and Canton of Geneva, CHE, April, 2017. International World Wide Web Conferences Steering Committee. Paper doi abstract bibtex From 1984, the US has annually conducted the Behavioral Risk Factor Surveillance System (BRFSS) surveys to capture either health behaviors, such as drinking or smoking, or health outcomes, including mental, physical, and generic health, of the population. Although this kind of information at a population level, such as US counties, is important for local governments to identify local needs, traditional datasets may take years to collate and to become publicly available. Geocoded social media data can provide an alternative reflection of local health trends. In this work, to predict the percentage of adults in a county reporting "insufficient sleep", a health behavior, and, at the same time, their health outcomes, novel textual and temporal features are proposed. The proposed textual features are defined at mid-level and can be applied on top of various low-level textual features. They are computed via kernel functions on underlying features and encode the relationships between individual underlying features over a population. To further enrich the predictive ability of the health indices, the textual features are augmented with temporal information. We evaluated the proposed features and compared them with existing features using a dataset collected from the BRFSS. Experimental results show that the combination of kernel-based textual features and temporal information predict well both the health behavior (with best performance at rho=0.82) and health outcomes (with best performance at rho=0.78), demonstrating the capability of social media data in prediction of population health indices. The results also show that our proposed features gained higher correlation coefficients than did the existing ones, increasing the correlation coefficient by up to 0.16, suggesting the potential of the approach in a wide spectrum of applications on data analytics at population levels.
@inproceedings{nguyen_prediction_2017,
address = {Republic and Canton of Geneva, CHE},
series = {{WWW} '17 {Companion}},
title = {Prediction of {Population} {Health} {Indices} from {Social} {Media} using {Kernel}-based {Textual} and {Temporal} {Features}},
isbn = {978-1-4503-4914-7},
url = {https://doi.org/10.1145/3041021.3054136},
doi = {10.1145/3041021.3054136},
abstract = {From 1984, the US has annually conducted the Behavioral Risk Factor Surveillance System (BRFSS) surveys to capture either health behaviors, such as drinking or smoking, or health outcomes, including mental, physical, and generic health, of the population. Although this kind of information at a population level, such as US counties, is important for local governments to identify local needs, traditional datasets may take years to collate and to become publicly available. Geocoded social media data can provide an alternative reflection of local health trends. In this work, to predict the percentage of adults in a county reporting "insufficient sleep", a health behavior, and, at the same time, their health outcomes, novel textual and temporal features are proposed. The proposed textual features are defined at mid-level and can be applied on top of various low-level textual features. They are computed via kernel functions on underlying features and encode the relationships between individual underlying features over a population. To further enrich the predictive ability of the health indices, the textual features are augmented with temporal information. We evaluated the proposed features and compared them with existing features using a dataset collected from the BRFSS. Experimental results show that the combination of kernel-based textual features and temporal information predict well both the health behavior (with best performance at rho=0.82) and health outcomes (with best performance at rho=0.78), demonstrating the capability of social media data in prediction of population health indices. The results also show that our proposed features gained higher correlation coefficients than did the existing ones, increasing the correlation coefficient by up to 0.16, suggesting the potential of the approach in a wide spectrum of applications on data analytics at population levels.},
urldate = {2020-11-17},
booktitle = {Proceedings of the 26th {International} {Conference} on {World} {Wide} {Web} {Companion}},
publisher = {International World Wide Web Conferences Steering Committee},
author = {Nguyen, Thin and Nguyen, Duc Thanh and Larsen, Mark E. and O'Dea, Bridianne and Yearwood, John and Phung, Dinh and Venkatesh, Svetha and Christensen, Helen},
month = apr,
year = {2017},
pages = {99--107},
}
Downloads: 0
{"_id":"BM8GEs7q5jDfRLd4Q","bibbaseid":"nguyen-nguyen-larsen-odea-yearwood-phung-venkatesh-christensen-predictionofpopulationhealthindicesfromsocialmediausingkernelbasedtextualandtemporalfeatures-2017","downloads":0,"creationDate":"2018-02-02T05:27:24.429Z","title":"Prediction of Population Health Indices from Social Media using Kernel-based Textual and Temporal Features","author_short":["Nguyen, T.","Nguyen, D. T.","Larsen, M. E.","O'Dea, B.","Yearwood, J.","Phung, D.","Venkatesh, S.","Christensen, H."],"year":2017,"bibtype":"inproceedings","biburl":"https://bibbase.org/zotero/wybert","bibdata":{"bibtype":"inproceedings","type":"inproceedings","address":"Republic and Canton of Geneva, CHE","series":"WWW '17 Companion","title":"Prediction of Population Health Indices from Social Media using Kernel-based Textual and Temporal Features","isbn":"978-1-4503-4914-7","url":"https://doi.org/10.1145/3041021.3054136","doi":"10.1145/3041021.3054136","abstract":"From 1984, the US has annually conducted the Behavioral Risk Factor Surveillance System (BRFSS) surveys to capture either health behaviors, such as drinking or smoking, or health outcomes, including mental, physical, and generic health, of the population. Although this kind of information at a population level, such as US counties, is important for local governments to identify local needs, traditional datasets may take years to collate and to become publicly available. Geocoded social media data can provide an alternative reflection of local health trends. In this work, to predict the percentage of adults in a county reporting \"insufficient sleep\", a health behavior, and, at the same time, their health outcomes, novel textual and temporal features are proposed. The proposed textual features are defined at mid-level and can be applied on top of various low-level textual features. They are computed via kernel functions on underlying features and encode the relationships between individual underlying features over a population. To further enrich the predictive ability of the health indices, the textual features are augmented with temporal information. We evaluated the proposed features and compared them with existing features using a dataset collected from the BRFSS. Experimental results show that the combination of kernel-based textual features and temporal information predict well both the health behavior (with best performance at rho=0.82) and health outcomes (with best performance at rho=0.78), demonstrating the capability of social media data in prediction of population health indices. The results also show that our proposed features gained higher correlation coefficients than did the existing ones, increasing the correlation coefficient by up to 0.16, suggesting the potential of the approach in a wide spectrum of applications on data analytics at population levels.","urldate":"2020-11-17","booktitle":"Proceedings of the 26th International Conference on World Wide Web Companion","publisher":"International World Wide Web Conferences Steering Committee","author":[{"propositions":[],"lastnames":["Nguyen"],"firstnames":["Thin"],"suffixes":[]},{"propositions":[],"lastnames":["Nguyen"],"firstnames":["Duc","Thanh"],"suffixes":[]},{"propositions":[],"lastnames":["Larsen"],"firstnames":["Mark","E."],"suffixes":[]},{"propositions":[],"lastnames":["O'Dea"],"firstnames":["Bridianne"],"suffixes":[]},{"propositions":[],"lastnames":["Yearwood"],"firstnames":["John"],"suffixes":[]},{"propositions":[],"lastnames":["Phung"],"firstnames":["Dinh"],"suffixes":[]},{"propositions":[],"lastnames":["Venkatesh"],"firstnames":["Svetha"],"suffixes":[]},{"propositions":[],"lastnames":["Christensen"],"firstnames":["Helen"],"suffixes":[]}],"month":"April","year":"2017","pages":"99–107","bibtex":"@inproceedings{nguyen_prediction_2017,\n\taddress = {Republic and Canton of Geneva, CHE},\n\tseries = {{WWW} '17 {Companion}},\n\ttitle = {Prediction of {Population} {Health} {Indices} from {Social} {Media} using {Kernel}-based {Textual} and {Temporal} {Features}},\n\tisbn = {978-1-4503-4914-7},\n\turl = {https://doi.org/10.1145/3041021.3054136},\n\tdoi = {10.1145/3041021.3054136},\n\tabstract = {From 1984, the US has annually conducted the Behavioral Risk Factor Surveillance System (BRFSS) surveys to capture either health behaviors, such as drinking or smoking, or health outcomes, including mental, physical, and generic health, of the population. Although this kind of information at a population level, such as US counties, is important for local governments to identify local needs, traditional datasets may take years to collate and to become publicly available. Geocoded social media data can provide an alternative reflection of local health trends. In this work, to predict the percentage of adults in a county reporting \"insufficient sleep\", a health behavior, and, at the same time, their health outcomes, novel textual and temporal features are proposed. The proposed textual features are defined at mid-level and can be applied on top of various low-level textual features. They are computed via kernel functions on underlying features and encode the relationships between individual underlying features over a population. To further enrich the predictive ability of the health indices, the textual features are augmented with temporal information. We evaluated the proposed features and compared them with existing features using a dataset collected from the BRFSS. Experimental results show that the combination of kernel-based textual features and temporal information predict well both the health behavior (with best performance at rho=0.82) and health outcomes (with best performance at rho=0.78), demonstrating the capability of social media data in prediction of population health indices. The results also show that our proposed features gained higher correlation coefficients than did the existing ones, increasing the correlation coefficient by up to 0.16, suggesting the potential of the approach in a wide spectrum of applications on data analytics at population levels.},\n\turldate = {2020-11-17},\n\tbooktitle = {Proceedings of the 26th {International} {Conference} on {World} {Wide} {Web} {Companion}},\n\tpublisher = {International World Wide Web Conferences Steering Committee},\n\tauthor = {Nguyen, Thin and Nguyen, Duc Thanh and Larsen, Mark E. and O'Dea, Bridianne and Yearwood, John and Phung, Dinh and Venkatesh, Svetha and Christensen, Helen},\n\tmonth = apr,\n\tyear = {2017},\n\tpages = {99--107},\n}\n\n","author_short":["Nguyen, T.","Nguyen, D. T.","Larsen, M. E.","O'Dea, B.","Yearwood, J.","Phung, D.","Venkatesh, S.","Christensen, H."],"key":"nguyen_prediction_2017","id":"nguyen_prediction_2017","bibbaseid":"nguyen-nguyen-larsen-odea-yearwood-phung-venkatesh-christensen-predictionofpopulationhealthindicesfromsocialmediausingkernelbasedtextualandtemporalfeatures-2017","role":"author","urls":{"Paper":"https://doi.org/10.1145/3041021.3054136"},"metadata":{"authorlinks":{}},"html":""},"search_terms":["prediction","population","health","indices","social","media","using","kernel","based","textual","temporal","features","nguyen","nguyen","larsen","o'dea","yearwood","phung","venkatesh","christensen"],"keywords":[],"authorIDs":[],"dataSources":["CdiCnWQBRGXM88A9h","TJkbwzD8s2wCxBy6Y"]}