@misc{10481/108583, year = {2025}, url = {https://hdl.handle.net/10481/108583}, abstract = {Machine learning (ML) uses algorithms to analyze data features and identify patterns. Deep learning (DL) is a subset of ML that uses neural networks (NN) to analyze complex relationships, often outperforming traditional models. In the DL field, transformer-based architectures introduce attention mechanisms to improve model performance. Transformers excel in time-series forecasting, and image processing. A few works have adapted transformers for tabular data, but remain ineffective for small datasets. ML and DL models are implemented through ML pipelines, consisting of different steps: data collection, validation, and preprocessing; followed by model training, tuning, evaluation, and visualization; and ending with the model deployment. The validation and preprocessing steps include feature selection and normalization, respectively. Feature selection determines the most influential features, while normalization ensures consistency in data distribution. Key challenges identified in this thesis include: (1) The lack of transformers adapted for small tabular datasets, which is important in contexts with limited data collection, such as questionnaires. (2) Within the data validation step, feature selection in ML pipelines is often focused on individual features rather than entire datasets, making resource allocation decisions difficult. (3) Within the data preprocessing step, normalization methods are applied without proper assessment, despite their impact on model accuracy and explainability. This work presents an ML pipeline that integrates data from various sources and fuses them. During the data validation step, we conduct an ablation study at the dataset level to assess the influence of each data source on the tested models, thereby addressing the challenge (2). In the data preprocessing step, we apply different normalization methods to analyze their impact on model performance, addressing challenge (3). Finally, we develop a transformer-based model with multiple attention layers specifically designed for limited data and integrate it during the model training/ tuning step to tackle the challenge (1). Additionally, we evaluate other models for comparison and draw conclusions in the evaluation and visualization step. The study implements the pipeline in a case study on smart villages, an adaptation of the smart city concept to rural communities. While smart cities have been the subject of numerous studies, smart villages are an emerging concept that has attracted attention but remains underexplored. The literature highlights that solutions designed for large cities may not be directly applicable to small villages. One significant difference between smart cities and smart villages is their population size and infrastructure. Unlike cities, where data are more easily collected through both automated systems and manual records, villages typically produce much smaller datasets. This limitation makes it challenging to apply DL models effectively, as they require vast amounts of data to achieve reliable performance. The results of this thesis provide insights into ML pipelines for vehicle mobility in smart villages. We deploy IoT devices, including LPR cameras, to collect vehicle behavior and contextual data such as holiday calendars, socio-economic factors, and visitor demographics. Unlike prior studies, we integrate LPR data with contextual information to improve clustering analysis. We evaluate normalization methods and their impact on cluster interpretation, identifying behavioral patterns that differentiate residents from visitors. After identifying vehicle patterns, we propose supervised classification tasks to predict different vehicles’ behaviors. For example, we propose a model that predicts how many nights a visitor spends in the area. Using this idea as a basis, we propose an ablation study at the dataset level, evaluating the level of improvement of the resulting models. Unlike conventional ablation studies, which focus on assessing the contribution of NN layers, our approach analyses the impact of datasets composed of different features from a common information source. Finally, to address data scarcity, we develop a transformer that combines vehicle data with visitor questionnaires, predicting repeat tourist visits. This proposal guides researchers in selecting validation and preprocessing techniques, such as feature selection and normalization. It advances research by applying transformers to small tabular datasets, improving predictive models in data-limited scenarios. It also helps stakeholders in smart villages analyze mobility patterns. Normalization reveals distinct visitor clusters, providing insights for strategies to promote overnight stays and encourage non-registered residents to register. An ablation study identifies socio-economic status and entry points as key predictors of visitor overnights, optimizing data use in tourism forecasting. Finally, our transformer model, which tracks repeat tourists, could aid urban planning and transport policies.}, organization = {Tesis Univ. Granada.}, organization = {Part of the R&D&i Project Ref. PID2019-109644RB-I00 funded by Ministerio de Ciencia e Innovación/ Agencia Estatal de Investigación/ 10.13039/501100011033}, organization = {R&D&i Project Ref. C-SEJ-128-UGR23 funded by Junta de Andalucía and “ERDF A way of making Europe”}, organization = {Project “Thematic Center on Mountain Ecosystem & Remote sensing, Deep learning-AI e-Services University of Granada-Sierra Nevada” (LifeWatch-2019-10-UGR-01), which has been co-funded by the Ministry of Science and Innovation through the FEDER funds from the Spanish 98 Pluriregional Operational Program 2014-2020 (POPE), LifeWatch-ERIC action line}, organization = {Co-financed by the Provincial Council of Granada}, organization = {Funding for open access charge: Universidad de Granada/CBUA}, organization = {Part of the grant PID2023-149185OB-I00 funded by MICIU/ AEI/10.13039/501100011033 and by ERDF/EU}, publisher = {Universidad de Granada}, title = {Tourism management in smart villages: development of a methodology with sensors and machine learning}, author = {Bolaños Martinez, Daniel}, }