@misc{10481/69620, year = {2021}, month = {4}, url = {http://hdl.handle.net/10481/69620}, abstract = {KnowSeq R/Bioc package is designed as a powerful, scalable and modular software focused on automatizing and assembling renowned bioinformatic tools with new features and functionalities. It comprises a unified environment to perform complex gene expression analyses, covering all the needed processing steps to identify a gene signature for a specific disease to gather understandable knowledge. This process may be initiated from raw files either available at well-known platforms or provided by the users themselves, and in either case coming from different information sources and different Transcriptomic technologies. The pipeline makes use of a set of advanced algorithms, including the adaptation of a novel procedure for the selection of the most representative genes in a given multiclass problem. Similarly, an intelligent system able to classify new patients, providing the user the opportunity to choose one among a number of well-known and widespread classification and feature selection methods in Bioinformatics, is embedded. Furthermore, KnowSeq is engineered to automatically develop a complete and detailed HTML report of the whole process which is also modular and scalable. Biclass breast cancer and multiclass lung cancer study cases were addressed to rigorously assess the usability and efficiency of KnowSeq. The models built by using the Differential Expressed Genes achieved from both experiments reach high classification rates. Furthermore, biological knowledge was extracted in terms of Gene Ontologies, Pathways and related diseases with the aim of helping the expert in the decision-making process. KnowSeq is available at Bioconductor (https://bioconductor.org/packages/KnowSeq), GitHub (https://github.com/CasedUgr/KnowSeq) and Docker (https://hub.docker.com/r/casedugr/knowseq).}, organization = {Spanish Ministry of Sciences, Innovation and Universities RTI2018-101674-B-I00}, organization = {Government of Andalusia P12TIC2082}, publisher = {Elsevier}, keywords = {Bioconductor}, keywords = {Gene expression}, keywords = {Classification}, keywords = {Enrichment}, keywords = {Bioinformatics}, title = {KnowSeq R-Bioc package: The automatic smart gene expression tool for retrieving relevant biological knowledge}, doi = {10.1016/j.compbiomed.2021.104387}, author = {Castillo Secilla, Daniel and Gálvez Gómez, Juan Manuel and Carrillo Pérez, Francisco and Verona Almeida, Marta and Redondo Sánchez, Daniel and Herrera Maldonado, Luis Javier and Rojas Ruiz, Ignacio}, }