Skip to content

Commit

Permalink
Merge pull request #39 from VEuPathDB/computes-refactor
Browse files Browse the repository at this point in the history
Refactor computes and correlations generic classes and methods
  • Loading branch information
d-callan authored Feb 28, 2024
2 parents 6238d84 + e511d47 commit ee8f2f4
Show file tree
Hide file tree
Showing 24 changed files with 1,173 additions and 3 deletions.
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,18 @@ Config/testthat/edition: 3
Collate:
'class-Bin.R'
'class-VariableMetadata.R'
'class-ComputeResult.R'
'class-CorrelationResult.R'
'class-Megastudy.R'
'class-Range.R'
'class-Statistic.R'
'data.R'
'method-correlation.R'
'methods-Bin.R'
'methods-VariableMetadata.R'
'methods-ComputeResult.R'
'methods-Megastudy.R'
'methods-Statistic.R'
'methods-VariableMetadata.R'
'utils-classes.R'
'utils-cut.R'
'utils-numeric.R'
Expand Down
21 changes: 21 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ S3method(validateNumericCols,default)
S3method(validateNumericCols,list)
export(Bin)
export(BinList)
export(ComputeResult)
export(CorrelationResult)
export(DataShape)
export(DataType)
export(Megastudy)
Expand All @@ -25,6 +27,7 @@ export(VariableMetadataList)
export(VariableSpec)
export(VariableSpecList)
export(as.data.table)
export(correlation)
export(cut_interval)
export(cut_number)
export(cut_width)
Expand Down Expand Up @@ -62,12 +65,15 @@ export(merge)
export(naToZero)
export(new_data_frame)
export(nonZeroRound)
export(predicateFactory)
export(removeAttr)
export(selfCorrelation)
export(setAttrFromList)
export(setNaToZero)
export(setroworder)
export(shiftToNonNeg)
export(strSplit)
export(stripEntityIdFromColumnHeader)
export(toJSON)
export(toStringOrNull)
export(toStringOrPoint)
Expand All @@ -76,8 +82,13 @@ export(updateAttrById)
export(validateNumericCols)
export(whichValuesInBin)
export(whichValuesInBinList)
export(writeData)
export(writeMeta)
export(writeStatistics)
exportClasses(Bin)
exportClasses(BinList)
exportClasses(ComputeResult)
exportClasses(CorrelationResult)
exportClasses(DataShape)
exportClasses(DataType)
exportClasses(Megastudy)
Expand Down Expand Up @@ -118,13 +129,23 @@ exportMethods(getStudyIdColumnName)
exportMethods(getVariableSpec)
exportMethods(getVariableSpecColumnName)
exportMethods(merge)
exportMethods(predicateFactory)
exportMethods(selfCorrelation)
exportMethods(toJSON)
exportMethods(whichValuesInBin)
exportMethods(whichValuesInBinList)
exportMethods(writeData)
exportMethods(writeMeta)
exportMethods(writeStatistics)
import(data.table)
importFrom(Hmisc,rcorr)
importFrom(S4Vectors,SimpleList)
importFrom(SpiecEasi,pval.sparccboot)
importFrom(SpiecEasi,sparcc)
importFrom(SpiecEasi,sparccboot)
importFrom(digest,digest)
importFrom(microbenchmark,microbenchmark)
importFrom(purrr,map)
importFrom(purrr,map_lgl)
importFrom(stringi,stri_detect_regex)
useDynLib(microbiomeComputations)
91 changes: 91 additions & 0 deletions R/class-ComputeResult.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# so S4 will recognize data.table class as inheriting from data.frame
setOldClass(c("data.table", "data.frame"))

check_compute_result <- function(object) {
errors <- character()

if (is.na(object@name)) {
msg <- "Compute result must have a name."
errors <- c(errors, msg)
} else if (length(object@name) != 1) {
msg <- "Compute result name must have a single value."
errors <- c(errors, msg)
}

# If computedVariableMetadata is supplied, check that it contains
# metadata for variables actually in the result data, and that
# the variable classes are correct.
if (!!length(object@computedVariableMetadata)) {
variables <- object@computedVariableMetadata
col_names <- stripEntityIdFromColumnHeader(veupathUtils::findAllColNames(variables))

if (!all(col_names %in% names(object@data))) {
msg <- paste("Some specified computed variables are not present in compute result data.frame")
errors <- c(errors, msg)
}

var_classes <- unlist(lapply(as.list(variables), function(x) {x@variableClass@value}))
if (!all(var_classes %in% 'computed')) {
msg <- paste("Some specified computed variables have the wrong variable class.")
errors <- c(errors, msg)
}

# if we have computed variable metadata, we should have data and vice versa
if (!length(object@data) || nrow(object@data) == 0) {
msg <- "Compute result must include computed variable metadata or data."
errors <- c(errors, msg)
} else {
if (any(grepl(".", names(object@data), fixed = TRUE))) {
msg <- paste("Column headers appear to be in dot notation [entityId.variableId]. They should be the raw variableId.")
errors <- c(errors, msg)
}

expectedOutputIdColHeaders <- stripEntityIdFromColumnHeader(c(object@recordIdColumn, object@ancestorIdColumns))
actualOutputIdColHeaders <- names(object@data)[1:length(expectedOutputIdColHeaders)]
if (all(expectedOutputIdColHeaders != actualOutputIdColHeaders)) {
msg <- paste("Columns must be ordered by recordIdColumn, ancestorIdColumns, and then data columns.")
errors <- c(errors, msg)
}
}
} else if (!length(object@statistics)) {
msg <- "Compute result must include computed variable metadata or statistics."
errors <- c(errors, msg)
}

return(if (length(errors) == 0) TRUE else errors)
}

#' Compute Result
#'
#' A class for consistently representing the results of computes.
#' This includes their representation in R, as JSON and how they are written to files.
#'
#' @slot data A data.frame of values where computed variables are columns and samples rows.
#' @slot name The name of the compute, ex: 'alphaDiv'.
#' @slot recordIdColumn The name of the column containing IDs for the samples. All other columns will be treated as computed values.
#' @slot ancestorIdColumns A character vector of column names representing parent entities of the recordIdColumn.
#' @slot computedVariableMetadata veupathUtils::VariableMetadataList detailing the computed variables.
#' @slot statistics An optional slot of any values. List or data.frame are recommended. It is not required to have rows or cols map to samples.
#' @slot computationDetails An optional message about the computed results.
#' @slot parameters A record of the input parameters used to generate the computed results.
#' @slot droppedColumns A character vector of column names that have been dropped for being unsuitable for the computation.
#' @name ComputeResult-class
#' @rdname ComputeResult-class
#' @export
#' @include class-VariableMetadata.R
ComputeResult <- setClass("ComputeResult", representation(
name = 'character',
data = 'data.frame',
recordIdColumn = 'character',
ancestorIdColumns = 'character',
computedVariableMetadata = 'VariableMetadataList',
statistics = 'ANY',
computationDetails = 'character',
parameters = 'character',
droppedColumns = 'character'
), prototype = prototype(
name = NA_character_,
recordIdColumn = NA_character_,
computationDetails = NA_character_,
parameters = NA_character_
), validity = check_compute_result)
34 changes: 34 additions & 0 deletions R/class-CorrelationResult.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
check_correlation_result <- function(object) {
errors <- character()

if (!length(object@data1Metadata) > 0) {
msg <- "Compute result must include information about data1 in the data1Metadata slot"
errors <- c(errors, msg)
} else if (!length(object@data2Metadata) > 0) {
msg <- "Compute result must include information about data2 in the data2Metadata slot"
errors <- c(errors, msg)
}

return(if (length(errors) == 0) TRUE else errors)
}


#' Corrleation Result
#'
#' A class to represent the computed results from a correlation calculation.
#' This includes their representation in R, as JSON, and how they are written to files.
#'
#' @slot statistics An optional slot of any values. List or data.frame are recommended. It is not required to have rows or cols map to samples.
#' @slot data1Metadata A character describing data1. Should at least describe the name of data1.
#' @slot data2Metadata A character describing data2. Should at least describe the name of data2.
#' @name CorrelationResult-class
#' @rdname CorrelationResult-class
#' @export
CorrelationResult <- setClass("CorrelationResult", representation(
data1Metadata = "character",
data2Metadata = "character",
statistics = "data.frame"
), prototype = prototype(
data1Metadata = NA_character_,
data2Metadata = NA_character_
), validity = check_correlation_result)
Loading

0 comments on commit ee8f2f4

Please sign in to comment.