@misc{10481/110902,
year = {2026},
url = {https://hdl.handle.net/10481/110902},
abstract = {Gender identification of researchers is a common practice in scientometric studies examining inequalities in science. The most widely used approach relies on inferring gender from author names using commercial APIs or name-gender dictionaries, which often lack transparency and reproducibility. This study explores the use of local open-weight Large Language Models (LLMs) as an alternative for name-based gender classification. We evaluate 25 models from seven leading families (Llama, Gemma, Phi, Mistral, Qwen, DeepSeek, and Yi), ranging from 270 million to 70 billion parameters, using a reference dataset of nearly 200,000 names across 195 countries extracted from Wikidata. Results show that top-performing models achieve F1-Scores above 0.93 for both gender categories, positioning local LLMs as a viable, cost-effective, and reproducible alternative to proprietary tools. A critical performance threshold emerges at approximately 7 billion parameters, above which all models achieve acceptable results, with diminishing returns beyond 12-14 billion. All models exhibit systematic gender bias, showing higher precision for men and higher recall for women, indicating a tendency to classify ambiguous names as male. Mistral-Nemo-12b emerges as the optimal choice, balancing accuracy, computational efficiency, and gender equity.},
keywords = {Generative AI},
keywords = {Local Large Language Models},
keywords = {Gender Assignment Algorithms},
title = {Inferring Gender from Author Names with Local LLMs: A Multi-Model Evaluation},
author = {Herrero Solana, Víctor and González-Salmón, Elvira and Robinson García, Nicolás},
}