diff --git a/modules/nf-core/custom/matrixfilter/meta.yml b/modules/nf-core/custom/matrixfilter/meta.yml index 3ba2d0cb829..99d7ca4e864 100644 --- a/modules/nf-core/custom/matrixfilter/meta.yml +++ b/modules/nf-core/custom/matrixfilter/meta.yml @@ -72,6 +72,12 @@ input: Minimum observations that must have a numeric value (not be NA) to retain the row/ feature (e.g. gene). Supplied via task.ext.args as --minimum_samples_not_na. Overrides minimum_proportion_not_na + - most_variant_features: + type: integer + description: | + Variance filter for the number of row/ feature (e.g. gene) observations returned. + Supplied via task.ext.args as --most_variant_features + output: - versions: type: file diff --git a/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R b/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R old mode 100644 new mode 100755 index 331f1a0607c..7e53938a7cb --- a/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R +++ b/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R @@ -69,6 +69,21 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, nrows = - ) } +#' Identify rows that are among the top n most variant +#' +#' @param matrix_data Matrix object +#' +#' @return output Boolean vector + +most_variant_test <- function(matrix_data) { + + # Determine the indices of the top variant rows based on variance + top_indices <- order(-apply(matrix_data, 1, var, na.rm = TRUE))[1:opt\$most_variant_features] + + # Return a boolean vector indicating if each row is among the top variant ones + 1:nrow(matrix_data) %in% top_indices +} + # Set up default options opt <- list( @@ -80,7 +95,8 @@ opt <- list( minimum_proportion = 0, grouping_variable = NULL, minimum_proportion_not_na = 0.5, - minimum_samples_not_na = NULL + minimum_samples_not_na = NULL, + most_variant_features = NULL ) opt_types <- lapply(opt, class) @@ -176,6 +192,14 @@ boolean_matrix <- t(apply(abundance_matrix, 1, function(row) { sapply(tests, function(f) f(row)) })) +# Apply the 'most_variant_test' function to identify the most variant rows and add +# the result to the boolean matrix + +if (! is.null(opt\$most_variant_features)) { + most_variant_vectors <- most_variant_test(abundance_matrix) + boolean_matrix <- cbind(boolean_matrix, most_variant_vectors) +} + # We will retain features passing all tests keep <- apply(boolean_matrix, 1, all) diff --git a/tests/modules/nf-core/custom/matrixfilter/main.nf b/tests/modules/nf-core/custom/matrixfilter/main.nf index 1484ef7c66b..4f95cb1a607 100644 --- a/tests/modules/nf-core/custom/matrixfilter/main.nf +++ b/tests/modules/nf-core/custom/matrixfilter/main.nf @@ -71,3 +71,15 @@ workflow test_custom_matrixfilter_na_samples { ch_samplesheet ) } + +workflow test_custom_matrixfilter_var { + + expression_matrix_file = file(params.test_data['mus_musculus']['genome']['rnaseq_matrix'], checkIfExists: true) + + ch_samples_matrix = [ [ "id":"SRP254919" ], expression_matrix_file ] + + CUSTOM_MATRIXFILTER( + ch_samples_matrix, + empty_samplesheet + ) +} diff --git a/tests/modules/nf-core/custom/matrixfilter/nextflow.config b/tests/modules/nf-core/custom/matrixfilter/nextflow.config index 25260f207a0..cd0925f534e 100644 --- a/tests/modules/nf-core/custom/matrixfilter/nextflow.config +++ b/tests/modules/nf-core/custom/matrixfilter/nextflow.config @@ -26,4 +26,8 @@ process { ext.args = { "--sample_id_col Experiment --minimum_abundance 28 --minimum_samples_not_na 1" } ext.prefix = { "${meta.id}_test" } } + withName: 'test_custom_matrixfilter_var:CUSTOM_MATRIXFILTER' { + ext.args = { "--most_variant_features 500" } + ext.prefix = { "${meta.id}_test" } + } }