@misc{10481/107557, year = {2025}, month = {10}, url = {https://hdl.handle.net/10481/107557}, abstract = {In an era where digital misinformation spreads rapidly, Artificial Intelligence (AI) has become a crucial tool for fact-checking. However, the effectiveness of AI in this domain is often limited by the availability of high-quality and scalable datasets to train and guide algorithms. In this paper, we introduce VERIFAID (VERIfication FAISS-based framework for fake news Detection), a novel framework that improves fact-checking through a Retrieval-Augmented Generation (RAG) system based on automatically generated and dynamically growing datasets. Our approach improves evidence retrieval by building a scalable knowledge base, reducing the reliance on manually annotated data. The system consists of three key modules: two dedicated to dataset creation and one inference module that integrates advanced language models, such as LLaMA, within the RAG paradigm. To validate our methodology, we provide technical specifications for both the system and the dataset, together with comprehensive evaluations in zero-shot fact-checking scenarios. The results demonstrate the efficiency and adaptability of our approach and its potential to improve AI-driven fact verification at scale.}, organization = {The research reported in this paper was supported by the DesinfoScan project: Grant TED2021-129402B-C21 funded by MICIU/AEI/10.13039/501100011033 and by the European Union NextGenerationEU/PRTR, and FederaMed project: Grant PID2021-123960OB-I00 funded by MICIU/AEI/10.13039/501100011033 and by ERDF/EU. Finally, the research reported in this paper is also funded by the European Union (BAG-INTEL project, grant agreement no. 101121309).}, publisher = {Pergamon}, keywords = {fact checking}, keywords = {RAG}, keywords = {NLP}, keywords = {lenguaje models}, keywords = {datasets}, title = {The blueprint of a new fact-checking system: A methodology to enrich RAG systems with new generated datasets}, doi = {https://doi.org/10.1016/j.compeleceng.2025.110746}, author = {Díaz García, José Ángel and López‑Joya, Salvador and Martín Bautista, María José and Ruiz Jiménez, María Dolores}, }