@misc{10481/81204, year = {2022}, url = {https://hdl.handle.net/10481/81204}, abstract = {Machine learning is recognised as a relevant approach to detect attacks and other anomalies in network traffic. However, there are still no suitable network datasets that would enable effective detection. On the other hand, the preparation of a network dataset is not easy due to privacy reasons but also due to the lack of tools for assessing their quality. In a previous paper, we proposed a new method for data quality assessment based on permutation testing. This paper presents a parallel study on the limits of detection of such an approach. We focus on the problem of network flow classification and use well-known machine learning techniques. The experiments were performed using publicly available network datasets.}, organization = {This work is partially funded by the European Union’s Horizon 2020 research, innovation programme under the Marie Sk lodowska-Curie grant agreement No 893146, by the Agencia Estatal de Investigaci´on in Spain, grant No PID2020- 113462RB-I00, and by the Ministry of Interior of the Czech Republic (Flow- Based Encrypted Traffic Analysis) under grant number VJ02010024. The authors would like to thank Szymon Wojciechowski for his support on the Weles tool.}, publisher = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, ECML-PKDD 2022, 4th Workshop on Machine Learning for Cybersecurity (MLCS)}, keywords = {Dataset quality assessment}, keywords = {Permutation testing}, keywords = {Network dataset}, keywords = {Network security}, keywords = {Attack detection}, keywords = {Machine learning}, keywords = {Classification}, title = {Evaluation of the Limit of Detection in Network Dataset Quality Assessment with PerQoDA}, author = {Wasielewska, Katarzyna and Soukup, Dominik and Cejka, Tomas and Camacho Páez, José}, }