Release v3.4.0

OHDSI · Feb 5, 2024 · 8a7cb70 · 8a7cb70
2 parents 6a7e90a + cfb3962
commit 8a7cb70
Show file tree

Hide file tree

Showing 124 changed files with 1,207 additions and 1,276 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: FeatureExtraction
 Type: Package
 Title: Generating Features for a Cohort
-Version: 3.3.2
-Date: 2023-11-20
+Version: 3.4.0
+Date: 2024-02-02
 Authors@R: c(
  person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")),
  person("Marc", "Suchard", role = c("aut")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,16 @@
+FeatureExtraction 3.4.0
+=======================
+
+Bug Fixes:
+
+- SDM calculation fix (#223)
+- Each covariate has a unique covariate name (#219)
+- R check (notes) (#211, #215)
+
+Other:
+
+- Deprecate cohortId and use cohortIds (#188)
+
 FeatureExtraction 3.3.2
 =======================
 
@@ -186,4 +199,4 @@ FeatureExtraction 2.2.0
 
 Changes:
 
-1. Added the Hospital Frailty Risk Score.
+1. Added the Hospital Frailty Risk Score.
diff --git a/R/Aggregation.R b/R/Aggregation.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #
@@ -26,7 +26,7 @@
 #' @examples
 #' \dontrun{
 #' covariateData <- FeatureExtraction:::createEmptyCovariateData(
-#' cohortId = 1,
+#' cohortIds = 1,
 #' aggregated = FALSE,
 #' temporal = FALSE
 #' )

diff --git a/R/CompareCohorts.R b/R/CompareCohorts.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #
@@ -64,7 +64,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
  covariates1 <- covariateData1$covariates
  if (!is.null(cohortId1)) {
  covariates1 <- covariates1 %>%
- filter(cohortDefinitionId == cohortId1)
+ filter(.data$cohortDefinitionId == cohortId1)
  }
  covariates1 <- covariates1 %>%
  select(
@@ -76,7 +76,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
  covariates2 <- covariateData2$covariates
  if (!is.null(cohortId2)) {
  covariates2 <- covariates2 %>%
- filter(cohortDefinitionId == cohortId2)
+ filter(.data$cohortDefinitionId == cohortId2)
  }
  covariates2 <- covariates2 %>%
  select(
@@ -108,7 +108,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
  covariates1 <- covariateData1$covariatesContinuous
  if (!is.null(cohortId1)) {
  covariates1 <- covariates1 %>%
- filter(cohortDefinitionId == cohortId1)
+ filter(.data$cohortDefinitionId == cohortId1)
  }
  covariates1 <- covariates1 %>%
  select(
@@ -121,7 +121,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
  covariates2 <- covariateData2$covariatesContinuous
  if (!is.null(cohortId2)) {
  covariates2 <- covariates2 %>%
- filter(cohortDefinitionId == cohortId2)
+ filter(.data$cohortDefinitionId == cohortId2)
  }
  covariates2 <- covariates2 %>%
  select(
@@ -136,7 +136,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
  m$sd1[is.na(m$sd1)] <- 0
  m$mean2[is.na(m$mean2)] <- 0
  m$sd2[is.na(m$sd2)] <- 0
- m$sd <- sqrt(m$sd1^2 + m$sd2^2)
+ m$sd <- sqrt((m$sd1^2 + m$sd2^2) / 2)
  m$stdDiff <- (m$mean2 - m$mean1) / m$sd
  result <- bind_rows(result, m[, c("covariateId", "mean1", "sd1", "mean2", "sd2", "sd", "stdDiff")])
  }

diff --git a/R/CovariateData.R b/R/CovariateData.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #
@@ -55,7 +55,7 @@ setClass("CovariateData", contains = "Andromeda")
 #' @examples
 #' \dontrun{
 #' covariateData <- FeatureExtraction:::createEmptyCovariateData(
-#' cohortId = 1,
+#' cohortIds = 1,
 #' aggregated = FALSE,
 #' temporal = FALSE
 #' )
@@ -128,13 +128,15 @@ loadCovariateData <- function(file, readOnly) {
 setMethod("show", "CovariateData", function(object) {
  cli::cat_line(pillar::style_subtle("# CovariateData object"))
  cli::cat_line("")
- cohortId <- attr(object, "metaData")$cohortId
- if (length(cohortId) > 1) {
- cli::cat_line(paste("Cohorts of interest IDs:", paste(cohortId, collapse = ", ")))
- } else if (cohortId == -1) {
- cli::cat_line("All cohorts")
- } else {
- cli::cat_line(paste("Cohort of interest ID:", cohortId))
+ cohortIds <- attr(object, "metaData")$cohortIds
+ if (!is.null(cohortIds)) {
+ if (length(cohortIds) > 1) {
+ cli::cat_line(paste("Cohorts of interest IDs:", paste(cohortIds, collapse = ", ")))
+ } else if (cohortIds == -1) {
+ cli::cat_line("All cohorts")
+ } else {
+ cli::cat_line(paste("Cohort of interest ID:", cohortIds))
+ }
  }
  cli::cat_line("")
  cli::cat_line(pillar::style_subtle("Inherits from Andromeda:"))
@@ -204,7 +206,7 @@ isCovariateData <- function(x) {
 #' @examples
 #' \dontrun{
 #' covariateData <- FeatureExtraction:::createEmptyCovariateData(
-#' cohortId = 1,
+#' cohortIds = 1,
 #' aggregated = FALSE,
 #' temporal = FALSE
 #' )
@@ -232,7 +234,7 @@ isAggregatedCovariateData <- function(x) {
 #' @examples
 #' \dontrun{
 #' covariateData <- FeatureExtraction:::createEmptyCovariateData(
-#' cohortId = 1,
+#' cohortIds = 1,
 #' aggregated = FALSE,
 #' temporal = FALSE
 #' )
@@ -252,20 +254,21 @@ isTemporalCovariateData <- function(x) {
 
 #' Creates an empty covariate data object
 #'
-#' @param cohortId cohort number
+#' @param cohortIds For which cohort IDs should the covariate data be created?
 #' @param aggregated if the data should be aggregated
 #' @param temporal if the data is temporary
 #'
 #' @examples
 #' \dontrun{
 #' covariateData <- FeatureExtraction:::createEmptyCovariateData(
-#' cohortId = 1,
+#' cohortIds = 1,
 #' aggregated = FALSE,
 #' temporal = FALSE
 #' )
 #' }
+#' @return the empty CovariateData object
 #'
-createEmptyCovariateData <- function(cohortId, aggregated, temporal) {
+createEmptyCovariateData <- function(cohortIds, aggregated, temporal) {
  dummy <- tibble(
  covariateId = 1,
  covariateValue = 1
@@ -276,28 +279,20 @@ createEmptyCovariateData <- function(cohortId, aggregated, temporal) {
  if (!is.null(temporal) && temporal) {
  dummy$timeId <- 1
  }
- covariateData <- Andromeda::andromeda(
- covariates = dummy[!1, ],
- covariateRef = tibble(
- covariateId = 1,
- covariateName = "",
- analysisId = 1,
- conceptId = 1
- )[!1, ],
- analysisRef = tibble(
- analysisId = 1,
- analysisName = "",
- domainId = "",
- startDay = 1,
- endDay = 1,
- isBinary = "",
- missingMeansZero = ""
- )[!1, ]
- )
- attr(covariateData, "metaData") <- list(
- populationSize = 0,
- cohortId = cohortId
- )
+ covariateData <- Andromeda::andromeda(covariates = dummy[!1, ],
+ covariateRef = tibble(covariateId = 1, 
+ covariateName = "", 
+ analysisId = 1,
+ conceptId = 1)[!1, ],
+ analysisRef = tibble(analysisId = 1, 
+ analysisName = "",
+ domainId = "",
+ startDay = 1, 
+ endDay = 1, 
+ isBinary = "", 
+ missingMeansZero = "")[!1, ])
+ attr(covariateData, "metaData") <- list(populationSize = 0,
+ cohortIds = cohortIds)
  class(covariateData) <- "CovariateData"
  return(covariateData)
 }
diff --git a/R/DefaultCovariateSettings.R b/R/DefaultCovariateSettings.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #

diff --git a/R/DefaultTemporalCovariateSettings.R b/R/DefaultTemporalCovariateSettings.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #

diff --git a/R/DefaultTemporalSequenceCovariateSettings.R b/R/DefaultTemporalSequenceCovariateSettings.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #

diff --git a/R/DetailedCovariateSettings.R b/R/DetailedCovariateSettings.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #

diff --git a/R/FeatureExtraction.R b/R/FeatureExtraction.R
@@ -1,6 +1,6 @@
 # @file FeatureExtraction.R
 #
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #

diff --git a/R/GetCovariates.R b/R/GetCovariates.R
@@ -1,4 +1,4 @@
-# Copyright 2023 Observational Health Data Sciences and Informatics
+# Copyright 2024 Observational Health Data Sciences and Informatics
 #
 # This file is part of FeatureExtraction
 #
@@ -46,7 +46,10 @@
 #' specify both the database and the schema, so for example
 #' 'cdm_instance.dbo'.
 #' @param cohortTableIsTemp Is the cohort table a temp table?
-#' @param cohortId For which cohort ID(s) should covariates be constructed? If set to -1,
+#' @param cohortId DEPRECATED:For which cohort ID(s) should covariates be constructed? If set to -1,
+#' covariates will be constructed for all cohorts in the specified cohort
+#' table.
+#' @param cohortIds For which cohort ID(s) should covariates be constructed? If set to c(-1),
 #' covariates will be constructed for all cohorts in the specified cohort
 #' table.
 #' @param rowIdField The name of the field in the cohort table that is to be used as the
@@ -78,7 +81,7 @@
 #' cohortTable = "cohort",
 #' cohortDatabaseSchema = "main",
 #' cohortTableIsTemp = FALSE,
-#' cohortId = -1,
+#' cohortIds = -1,
 #' rowIdField = "subject_id",
 #' covariateSettings = covSettings,
 #' aggregated = FALSE
@@ -95,6 +98,7 @@ getDbCovariateData <- function(connectionDetails = NULL,
  cohortDatabaseSchema = cdmDatabaseSchema,
  cohortTableIsTemp = FALSE,
  cohortId = -1,
+ cohortIds = c(-1),
  rowIdField = "subject_id",
  covariateSettings,
  aggregated = FALSE) {
@@ -107,6 +111,10 @@ getDbCovariateData <- function(connectionDetails = NULL,
  if (cdmVersion == "4") {
  stop("CDM version 4 is not supported any more")
  }
+ if (!missing(cohortId)) { 
+ warning("cohortId argument has been deprecated, please use cohortIds")
+ cohortIds <- cohortId
+ }
  if (!is.null(connectionDetails)) {
  connection <- DatabaseConnector::connect(connectionDetails)
  on.exit(DatabaseConnector::disconnect(connection))
@@ -120,17 +128,13 @@ getDbCovariateData <- function(connectionDetails = NULL,
  } else {
  cohortDatabaseSchemaTable <- paste(cohortDatabaseSchema, cohortTable, sep = ".")
  }
- sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_id != -1} ? {WHERE cohort_definition_id IN (@cohort_id)} GROUP BY cohort_definition_id;"
- sql <- SqlRender::render(
- sql = sql,
- cohort_database_schema_table = cohortDatabaseSchemaTable,
- cohort_id = cohortId
- )
- sql <- SqlRender::translate(
- sql = sql,
- targetDialect = attr(connection, "dbms"),
- oracleTempSchema = oracleTempSchema
- )
+ sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_ids != -1} ? {WHERE cohort_definition_id IN (@cohort_ids)} GROUP BY cohort_definition_id;"
+ sql <- SqlRender::render(sql = sql,
+ cohort_database_schema_table = cohortDatabaseSchemaTable,
+ cohort_ids = cohortIds)
+ sql <- SqlRender::translate(sql = sql,
+ targetDialect = attr(connection, "dbms"),
+ oracleTempSchema = oracleTempSchema)
  temp <- DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = TRUE)
  if (aggregated) {
  populationSize <- temp$populationSize
@@ -139,7 +143,7 @@ getDbCovariateData <- function(connectionDetails = NULL,
  populationSize <- sum(temp$populationSize)
  }
  if (sum(populationSize) == 0) {
- covariateData <- createEmptyCovariateData(cohortId, aggregated, covariateSettings$temporal)
+ covariateData <- createEmptyCovariateData(cohortIds, aggregated, covariateSettings$temporal)
  warning("Population is empty. No covariates were constructed")
  } else {
  if (inherits(covariateSettings, "covariateSettings")) {
@@ -152,17 +156,15 @@ getDbCovariateData <- function(connectionDetails = NULL,
  }
  for (i in 1:length(covariateSettings)) {
  fun <- attr(covariateSettings[[i]], "fun")
- args <- list(
- connection = connection,
- oracleTempSchema = oracleTempSchema,
- cdmDatabaseSchema = cdmDatabaseSchema,
- cohortTable = cohortDatabaseSchemaTable,
- cohortId = cohortId,
- cdmVersion = cdmVersion,
- rowIdField = rowIdField,
- covariateSettings = covariateSettings[[i]],
- aggregated = aggregated
- )
+ args <- list(connection = connection,
+ oracleTempSchema = oracleTempSchema,
+ cdmDatabaseSchema = cdmDatabaseSchema,
+ cohortTable = cohortDatabaseSchemaTable,
+ cohortIds = cohortIds,
+ cdmVersion = cdmVersion,
+ rowIdField = rowIdField,
+ covariateSettings = covariateSettings[[i]],
+ aggregated = aggregated)
  tempCovariateData <- do.call(eval(parse(text = fun)), args)
  if (is.null(covariateData)) {
  covariateData <- tempCovariateData
@@ -197,7 +199,7 @@ getDbCovariateData <- function(connectionDetails = NULL,
  }
  }
  attr(covariateData, "metaData")$populationSize <- populationSize
- attr(covariateData, "metaData")$cohortId <- cohortId
+ attr(covariateData, "metaData")$cohortIds <- cohortIds
  }
  return(covariateData)
 }