@misc{10481/108605, year = {2025}, url = {https://hdl.handle.net/10481/108605}, abstract = {Aunque los recientes avances en modelos lingüísticos y representaciones multimodales permiten mejorar la comprensión y la inferencia desde el punto de vista semántico, la eficacia general de las aplicaciones con restricciones sigue estando limitada por los datos distribuidos y no homogeneizados, normalmente procedentes de fuentes de datos heterogéneas. Esta tesis se centra en los retos y oportunidades en el desarrollo de modelos de procesamiento del lenguaje natural para aplicaciones de dominio específico que implican el manejo de fuentes de datos heterogéneas y restricciones de usuario. La tesis demuestra que las relaciones semánticas aprendidas mediante modelos lingüísticos pueden aplicarse con éxito a tareas como la adaptación personalizada de contenidos. Además, se demuestra que la integración de grafos de conocimiento y variables lingüísticas mejora la interpretabilidad por parte del usuario y la personalización en el usuario en entornos personalizados.}, abstract = {Although recent advances in language models and multimodal representations enable improved semantic understanding and inference, the overall effectiveness of constrained applications remains limited by distributed and non-homogeneous data from heterogeneous data sources. This thesis investigates the challenges and opportunities in developing natural language processing (NLP) models for domain-specific applications involving heterogeneous data sources and user constraints. The thesis demonstrates that semantic relationships learned through language models can be successfully applied to tasks such as personalised content adaptation. Additionally, the integration of knowledge graphs and linguistic variables is shown to enhance understandability and user-centred recommendation in personalised environments.}, organization = {Tesis Univ. Granada.}, organization = {Department for Economic Transformation, Industry, Knowledge and Universities of the Andalusian Regional Government through a pre-doctoral fellowship program (Grant Ref. PREDOC_00298)}, organization = {Partially supported by the Grant PID2021-123960OB-I00 funded by MCIN/AEI/10.13039/501100011033}, organization = {ERDF A way of making Europe}, organization = {Project BAG-INTEL (Ref. 101121309) funded by the European Commission}, organization = {Project CITIC-2024-06, funded by the Research Center for Information and Communication technologies of the University of Granada}, publisher = {Universidad de Granada}, keywords = {Aprendizaje profundo}, keywords = {Procesamiento del lenguaje natural}, keywords = {Grafos de conocimiento}, keywords = {Deep learning}, keywords = {Natural language processing}, keywords = {Knowledge graphs}, title = {Deep learning and natural language processing in heterogeneous sources of massive data}, author = {Morales Garzón, Andrea}, }