@misc{10481/93666, year = {2024}, url = {https://hdl.handle.net/10481/93666}, abstract = {There is an increasing interest in the development of new data-driven models useful to assess the performance of communication networks. For many applications, like network monitoring and troubleshooting, a data model is of little use if it cannot be interpreted by a human operator. In this paper, we present an extension of the Multivariate Big Data Analysis (MBDA) methodology, a recently proposed interpretable data analysis tool. In this extension, we propose a solution to the automatic derivation of features, a cornerstone step for the application of MBDA when the amount of data is massive. The resulting network monitoring approach allows us to detect and diagnose disparate network anomalies, with a data-analysis workflow that combines the advantages of interpretable and interactive models with the power of parallel processing. We apply the extended MBDA to two case studies: UGR’16, a benchmark flow-based real-traffic dataset for anomaly detection, and Dartmouth’18, the longest and largest Wi-Fi trace known to date.}, organization = {10.13039/100000001-US National Science Foundation (Grant Number: 0454062) Agencia Estatal de Investigación in Spain (Grant Number: PID2020-113462RBI00) 10.13039/100010665-European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie (Grant Number: 893146) Universidad de Granada/CBUA}, publisher = {IEEE}, keywords = {Data models}, keywords = {Analytical models}, keywords = {Monitoring}, keywords = {Big Data}, keywords = {Representation learning}, keywords = {Principal component analysis}, keywords = {Data visualization}, title = {Interpretable Feature Learning in Multivariate Big Data Analysis for Network Monitoring}, doi = {10.1109/TNSM.2024.3368501}, author = {Camacho Páez, José and Wasielewska, Katarzyna and Bro, Rasmus and Kotz, David}, }