@misc{10481/93666,
year = {2024},
url = {https://hdl.handle.net/10481/93666},
abstract = {There is an increasing interest in the development of new data-driven models useful to assess the performance of communication networks. For many applications, like network monitoring and troubleshooting, a data model is of little use if it cannot be interpreted by a human operator. In this paper, we present an extension of the Multivariate Big Data Analysis (MBDA) methodology, a recently proposed interpretable data analysis tool. In this extension, we propose a solution to the automatic derivation of features, a cornerstone step for the application of MBDA when the amount of data is massive. The resulting network monitoring approach allows us to detect and diagnose disparate network anomalies, with a data-analysis workflow that combines the advantages of interpretable and interactive models with the power of parallel processing. We apply the extended MBDA to two case studies: UGR’16, a benchmark flow-based real-traffic dataset for anomaly detection, and Dartmouth’18, the longest and largest Wi-Fi trace known to date.},
organization = {10.13039/100000001-US National Science Foundation (Grant Number: 0454062)
Agencia Estatal de Investigación in Spain (Grant Number: PID2020-113462RBI00)
10.13039/100010665-European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie (Grant Number: 893146)
Universidad de Granada/CBUA},
publisher = {IEEE},
keywords = {Data models},
keywords = {Analytical models},
keywords = {Monitoring},
keywords = {Big Data},
keywords = {Representation learning},
keywords = {Principal component analysis},
keywords = {Data visualization},
title = {Interpretable Feature Learning in Multivariate Big Data Analysis for Network Monitoring},
doi = {10.1109/TNSM.2024.3368501},
author = {Camacho Páez, José and Wasielewska, Katarzyna and Bro, Rasmus and Kotz, David},
}