@misc{10481/80607, year = {2022}, month = {11}, url = {https://hdl.handle.net/10481/80607}, abstract = {Deep learning techniques have widely been applied to speech enhancement as they show outstanding modeling capa- bilities that are needed for proper speech-noise separation. In contrast to other end-to-end approaches, masking-based meth- ods consider speech spectra as input to the deep neural network, providing spectral masks for noise removal or attenuation. In these approaches, the Short-Time Fourier Transform (STFT) and, particularly, the parameters used for the analysis/synthesis window, plays an important role which is often neglected. In this paper, we analyze the effects of window length and shift on a complex-domain convolutional-recurrent neural network (DCCRN) which is able to provide, separately, magnitude and phase corrections. Different perceptual quality and intelligibil- ity objective metrics are used to assess its performance. As a re- sult, we have observed that phase corrections have an increased impact with shorter window sizes. Similarly, as window overlap increases, phase takes more relevance than magnitude spectrum in speech enhancement.}, organization = {Project PID2019-104206GB-I00 funded by MCIN/AEI/10.13039/501100011033.}, publisher = {ISCA - Iberspeech 2022}, keywords = {Speech enhancement}, keywords = {Deep neural network}, keywords = {Short Time Fourier Transform}, keywords = {Complex spectral masking}, title = {The role of window length and shift in complex-domain DNN-based speech enhancement}, doi = {10.21437/IberSPEECH.2022-30}, author = {García Ruíz, Celia and Martín Doñas, Juan M. and Gómez García, Ángel Manuel}, }