Skip to content

Commit

Permalink
Added variance filtering to custom/matrixfilter (#5358)
Browse files Browse the repository at this point in the history
* Added variant filtering to matrix filter

* Corrected indentation

* Simplified function in single conditional block

* fixed line indentation

* Fixed whitespace

* Fixed logic

* line wrapping change

* added whitespace around comments

* [automated] Fix linting with Prettier

* remove trailing whitespace

* Update matrixfilter.R

* Remove whitespace

---------

Co-authored-by: Jonathan Manning <jonathan.manning@seqera.io>
Co-authored-by: nf-core-bot <core@nf-co.re>
Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
  • Loading branch information
5 people authored Jun 28, 2024
1 parent 2dfe9af commit bf3fa81
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 1 deletion.
6 changes: 6 additions & 0 deletions modules/nf-core/custom/matrixfilter/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ input:
Minimum observations that must have a numeric value (not be NA) to retain
the row/ feature (e.g. gene). Supplied via task.ext.args as
--minimum_samples_not_na. Overrides minimum_proportion_not_na
- most_variant_features:
type: integer
description: |
Variance filter for the number of row/ feature (e.g. gene) observations returned.
Supplied via task.ext.args as --most_variant_features
output:
- versions:
type: file
Expand Down
26 changes: 25 additions & 1 deletion modules/nf-core/custom/matrixfilter/templates/matrixfilter.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,21 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, nrows = -
)
}

#' Identify rows that are among the top n most variant
#'
#' @param matrix_data Matrix object
#'
#' @return output Boolean vector

most_variant_test <- function(matrix_data) {

# Determine the indices of the top variant rows based on variance
top_indices <- order(-apply(matrix_data, 1, var, na.rm = TRUE))[1:opt\$most_variant_features]

# Return a boolean vector indicating if each row is among the top variant ones
1:nrow(matrix_data) %in% top_indices
}

# Set up default options

opt <- list(
Expand All @@ -80,7 +95,8 @@ opt <- list(
minimum_proportion = 0,
grouping_variable = NULL,
minimum_proportion_not_na = 0.5,
minimum_samples_not_na = NULL
minimum_samples_not_na = NULL,
most_variant_features = NULL
)
opt_types <- lapply(opt, class)

Expand Down Expand Up @@ -176,6 +192,14 @@ boolean_matrix <- t(apply(abundance_matrix, 1, function(row) {
sapply(tests, function(f) f(row))
}))

# Apply the 'most_variant_test' function to identify the most variant rows and add
# the result to the boolean matrix

if (! is.null(opt\$most_variant_features)) {
most_variant_vectors <- most_variant_test(abundance_matrix)
boolean_matrix <- cbind(boolean_matrix, most_variant_vectors)
}

# We will retain features passing all tests

keep <- apply(boolean_matrix, 1, all)
Expand Down
12 changes: 12 additions & 0 deletions tests/modules/nf-core/custom/matrixfilter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,15 @@ workflow test_custom_matrixfilter_na_samples {
ch_samplesheet
)
}

workflow test_custom_matrixfilter_var {

expression_matrix_file = file(params.test_data['mus_musculus']['genome']['rnaseq_matrix'], checkIfExists: true)

ch_samples_matrix = [ [ "id":"SRP254919" ], expression_matrix_file ]

CUSTOM_MATRIXFILTER(
ch_samples_matrix,
empty_samplesheet
)
}
4 changes: 4 additions & 0 deletions tests/modules/nf-core/custom/matrixfilter/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,8 @@ process {
ext.args = { "--sample_id_col Experiment --minimum_abundance 28 --minimum_samples_not_na 1" }
ext.prefix = { "${meta.id}_test" }
}
withName: 'test_custom_matrixfilter_var:CUSTOM_MATRIXFILTER' {
ext.args = { "--most_variant_features 500" }
ext.prefix = { "${meta.id}_test" }
}
}

0 comments on commit bf3fa81

Please sign in to comment.