@misc{10481/105012,
year = {2025},
month = {5},
url = {https://hdl.handle.net/10481/105012},
abstract = {The protein sequence space is vast. This fact, together with the prevalence of epistasis, hampers the engineering of novel enzymes through library screening and is a major obstacle to any attempt to predict natural protein evolution. Recently, specialized methodologies have been used to determine fitness data on ~260,000 sequences for the gene of the enzyme dihydrofolate reductase and antibody affinity data for all combinations of the mutations present in the receptor-binding domain (RBD) of the Omicron strain of SARS-CoV-2 (~30,000 variants). We show that upon iterative training on a total of just a few hundred variants, various state-of-the-art AI tools (multi-layer perceptron, random forest, and XGBoost algorithms) find very high fitness variants of the enzyme and predict the antibody evasion patterns of the RBD. This work provides a basis for efficient, widely applicable, low-throughput experimental approaches to assess viral protein evolution and to engineer enzymes for biotechnological applications.},
organization = {Instituto de Salud Carlos III (IHRC22/00004)},
organization = {Next-Generation EU},
organization = {MICIU/AEI/10.13039/501100011033 (PID2021-124534OB-100, PID2021-0125017OB-I00)},
organization = {Enia Programs},
publisher = {MDPI},
keywords = {Enzyme engineering},
keywords = {Viral protein evolution},
keywords = {Focused library screening},
title = {Efficient Searches in Protein Sequence Space Through AI-Driven Iterative Learning},
doi = {10.3390/ijms26104741},
author = {Suárez-Martín, Ignacio and Risso, Valeria Alejandra and Romero-Zaliz, Rocío and Sánchez Ruiz, José Manuel},
}