@misc{10481/68515, year = {2021}, month = {8}, url = {http://hdl.handle.net/10481/68515}, abstract = {Online surveys, despite their cost and effort advantages, are particularly prone to selection bias due to the differences between target population and potentially covered population (online population). This leads to the unreliability of estimates coming from online samples unless further adjustments are applied. Some techniques have arisen in the last years regarding this issue, among which superpopulation modeling can be useful in Big Data context where censuses are accessible. This technique uses the sample to train a model capturing the behavior of a target variable which is to be estimated, and applies it to the nonsampled individuals to obtain population-level estimates. The modeling step has been usually done with linear regression or LASSO models, but machine learning (ML) algorithms have been pointed out as promising alternatives. In this study we examine the use of these algorithms in the online survey context, in order to evaluate and compare their performance and adequacy to the problem. A simulation study shows that ML algorithms can effectively volunteering bias to a greater extent than traditional methods in several scenarios.}, organization = {Ministerio de Economía y Competitividad, Spain}, organization = {Ministerio de Ciencia, Innovación y Universidades, Spain}, keywords = {Superpopulation modeling}, keywords = {Machine Learning}, keywords = {Online surveys}, keywords = {Simulation}, title = {Evaluating Machine Learning methods for estimation in online surveys with superpopulation modeling}, doi = {https://doi.org/10.1016/j.matcom.2020.03.005}, author = {Ferri García, Ramón and Castro-Martín, Luis and Rueda García, María Del Mar}, }