Skip to content

Commit

Permalink
Merge pull request #246 from OHDSI/develop
Browse files Browse the repository at this point in the history
Release v3.5.0
  • Loading branch information
ginberg committed Apr 18, 2024
2 parents 5049d54 + b8c964b commit 4a10c69
Show file tree
Hide file tree
Showing 71 changed files with 410 additions and 80 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: FeatureExtraction
Type: Package
Title: Generating Features for a Cohort
Version: 3.4.1
Date: 2024-03-28
Version: 3.5.0
Date: 2024-04-18
Authors@R: c(
person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")),
person("Marc", "Suchard", role = c("aut")),
Expand Down Expand Up @@ -44,6 +44,6 @@ VignetteBuilder: knitr
URL: https://github.com/OHDSI/FeatureExtraction
BugReports: https://github.com/OHDSI/FeatureExtraction/issues
NeedsCompilation: no
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Encoding: UTF-8
Language: en-US
13 changes: 12 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
FeatureExtraction 3.5.0
=======================

New Features:

- Adds the ability to filter covariates by setting a minimum threshold for covariate mean (#174)

Bug Fixes:

- Table 1 - does not report correct subject count (#237)

FeatureExtraction 3.4.1
=======================

Bug Fixes:

- Weely R-check fails (#239)
- Weekly R-check fails (#239)
- BigQuery error (#208)
- Error when specifying 1 temporal window in temportalCovariateSettings (#200)
- metaData aggregation issue (#195)
Expand Down
13 changes: 11 additions & 2 deletions R/GetCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@
#' of the createCovariate functions, or a list of such objects.
#' @param aggregated Should aggregate statistics be computed instead of covariates per
#' cohort entry?
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#'
#' @return
#' Returns an object of type \code{covariateData}, containing information on the covariates.
Expand Down Expand Up @@ -101,7 +104,8 @@ getDbCovariateData <- function(connectionDetails = NULL,
cohortIds = c(-1),
rowIdField = "subject_id",
covariateSettings,
aggregated = FALSE) {
aggregated = FALSE,
minCharacterizationMean = 0) {
if (is.null(connectionDetails) && is.null(connection)) {
stop("Need to provide either connectionDetails or connection")
}
Expand All @@ -115,6 +119,10 @@ getDbCovariateData <- function(connectionDetails = NULL,
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
errorMessages <- checkmate::makeAssertCollection()
minCharacterizationMean <- utils::type.convert(minCharacterizationMean, as.is = TRUE)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)
if (!is.null(connectionDetails)) {
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))
Expand Down Expand Up @@ -164,7 +172,8 @@ getDbCovariateData <- function(connectionDetails = NULL,
cdmVersion = cdmVersion,
rowIdField = rowIdField,
covariateSettings = covariateSettings[[i]],
aggregated = aggregated)
aggregated = aggregated,
minCharacterizationMean = minCharacterizationMean)
tempCovariateData <- do.call(eval(parse(text = fun)), args)
if (is.null(covariateData)) {
covariateData <- tempCovariateData
Expand Down
12 changes: 9 additions & 3 deletions R/GetCovariatesFromOtherCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
#' @param covariateSettings An object of type \code{covariateSettings} as created using the
#' \code{\link{createCohortBasedCovariateSettings}} or
#' \code{\link{createCohortBasedTemporalCovariateSettings}} functions.
#'
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#' @template GetCovarParams
#'
#' @export
Expand All @@ -35,7 +37,8 @@ getDbCohortBasedCovariatesData <- function(connection,
cdmVersion = "5",
rowIdField = "subject_id",
covariateSettings,
aggregated = FALSE) {
aggregated = FALSE,
minCharacterizationMean = 0) {
errorMessages <- checkmate::makeAssertCollection()
checkmate::assertClass(connection, "DatabaseConnectorConnection", add = errorMessages)
checkmate::assertCharacter(oracleTempSchema, len = 1, null.ok = TRUE, add = errorMessages)
Expand All @@ -46,6 +49,8 @@ getDbCohortBasedCovariatesData <- function(connection,
checkmate::assertCharacter(rowIdField, len = 1, add = errorMessages)
checkmate::assertClass(covariateSettings, "covariateSettings", add = errorMessages)
checkmate::assertLogical(aggregated, len = 1, add = errorMessages)
minCharacterizationMean <- utils::type.convert(minCharacterizationMean, as.is = TRUE)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
Expand Down Expand Up @@ -139,7 +144,8 @@ getDbCohortBasedCovariatesData <- function(connection,
cdmVersion = cdmVersion,
rowIdField = rowIdField,
covariateSettings = detailledSettings,
aggregated = aggregated
aggregated = aggregated,
minCharacterizationMean = minCharacterizationMean
)

sql <- "TRUNCATE TABLE #covariate_cohort_ref; DROP TABLE #covariate_cohort_ref;"
Expand Down
27 changes: 26 additions & 1 deletion R/GetDefaultCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
#' it is a temp table, do not specify \code{targetDatabaseSchema}.
#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored.
#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored.
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#'
#' @template GetCovarParams
#'
Expand Down Expand Up @@ -65,7 +68,8 @@ getDbDefaultCovariateData <- function(connection,
targetCovariateTable,
targetCovariateRefTable,
targetAnalysisRefTable,
aggregated = FALSE) {
aggregated = FALSE,
minCharacterizationMean = 0) {
if (!is(covariateSettings, "covariateSettings")) {
stop("Covariate settings object not of type covariateSettings")
}
Expand All @@ -79,6 +83,11 @@ getDbDefaultCovariateData <- function(connection,
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
errorMessages <- checkmate::makeAssertCollection()
minCharacterizationMean <- utils::type.convert(minCharacterizationMean, as.is = TRUE)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)

settings <- .toJson(covariateSettings)
rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction"))
json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortIds)), cdmDatabaseSchema)
Expand Down Expand Up @@ -126,6 +135,7 @@ getDbDefaultCovariateData <- function(connection,
andromedaTableName = "covariates",
snakeCaseToCamelCase = TRUE
)
filterCovariateDataCovariates(covariateData, "covariates", minCharacterizationMean)
}

# Continuous aggregated features
Expand All @@ -142,6 +152,7 @@ getDbDefaultCovariateData <- function(connection,
andromedaTableName = "covariatesContinuous",
snakeCaseToCamelCase = TRUE
)
filterCovariateDataCovariates(covariateData, "covariatesContinuous", minCharacterizationMean)
}

# Covariate reference
Expand Down Expand Up @@ -273,3 +284,17 @@ getDbDefaultCovariateData <- function(connection,
return(covariateData)
}
}

#' Filters the covariateData covariates based on the given characterization mean value.
#'
#' @param covariateData The covariate data
#' @param covariatesName The name of the covariates object inside the covariateData
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
filterCovariateDataCovariates <- function(covariateData, covariatesName, minCharacterizationMean = 0) {
if ("averageValue" %in% colnames(covariateData[[covariatesName]]) && minCharacterizationMean != 0) {

This comment has been minimized.

Copy link
@msuchard

msuchard Apr 20, 2024

Member

@ginberg -- means can be less than 0, so having 0 as a no-action default seems a bit inconsistent. why not use NA as the default (no-action) value?

covariateData[[covariatesName]] <- covariateData[[covariatesName]] %>%
dplyr::filter(.data$averageValue > minCharacterizationMean)

This comment has been minimized.

Copy link
@msuchard

msuchard Apr 20, 2024

Member

@ginberg -- > is inconsistent with the function help, since the opposite of > is <= so that minCharacterizationMean is not the min, but actually just a little smaller than the min. i suggest we change the logic here to >=

This comment has been minimized.

Copy link
@anthonysena

anthonysena Apr 22, 2024

Collaborator

Thanks for catching this @msuchard - we'll patch this up.

}
}
17 changes: 14 additions & 3 deletions R/Table1.R
Original file line number Diff line number Diff line change
Expand Up @@ -562,18 +562,20 @@ createTable1 <- function(covariateData1,

if (nrow(binaryTable) != 0) {
if (comparison) {
populationSize1 <- getPopulationSize(covariateData1, cohortId1)
populationSize2 <- getPopulationSize(covariateData2, cohortId2)
colnames(binaryTable) <- c(
"Characteristic",
"Count",
paste0(
"% (n = ",
formatCount(attr(covariateData1, "metaData")$populationSize),
formatCount(populationSize1),
")"
),
"Count",
paste0(
"% (n = ",
formatCount(attr(covariateData2, "metaData")$populationSize),
formatCount(populationSize2),
")"
),
"Std.Diff"
Expand All @@ -590,12 +592,13 @@ createTable1 <- function(covariateData1,
binaryTable$count2 <- NULL
binaryTable$percent2 <- NULL
binaryTable$stdDiff <- NULL
populationSize1 <- getPopulationSize(covariateData1, cohortId1)
colnames(binaryTable) <- c(
"Characteristic",
"Count",
paste0(
"% (n = ",
formatCount(attr(covariateData1, "metaData")$populationSize),
formatCount(populationSize1),
")"
)
)
Expand Down Expand Up @@ -722,3 +725,11 @@ createTable1CovariateSettings <- function(specifications = getDefaultTable1Speci
covariateSettings$analyses <- analyses
return(covariateSettings)
}

getPopulationSize <- function(covariateData, cohortId) {
result <- attr(covariateData, "metaData")$populationSize
if (!is.null(cohortId)) {
result <- result[cohortId]
}
return(result)
}
7 changes: 5 additions & 2 deletions R/UnitTestHelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@
#' of the createCovariate functions, or a list of such objects.
#' @param aggregated Should aggregate statistics be computed instead of covariates per
#' cohort entry?
#'
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#' @return
#' Returns an object of type \code{covariateData}, containing information on the covariates.
#'
Expand Down Expand Up @@ -94,7 +96,8 @@
cdmVersion = "5",
rowIdField = "subject_id",
covariateSettings,
aggregated = FALSE) {
aggregated = FALSE,
minCharacterizationMean = 0) {
writeLines("Constructing length of observation covariates")
if (covariateSettings$useLengthOfObs == FALSE) {
return(NULL)
Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/CreatingCovariatesBasedOnOtherCohorts.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/CreatingCovariatesUsingCohortAttributes.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/CreatingCustomCovariateBuilders.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/CreatingCustomCovariateBuildersKorean.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/UsingFeatureExtraction.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/UsingFeatureExtractionKorean.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4a10c69

Please sign in to comment.