Skip to content

Commit

Permalink
use dt_shallow to reduce memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunuuuu committed Nov 6, 2023
1 parent 4d4ca51 commit e3b0073
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 63 deletions.
5 changes: 4 additions & 1 deletion R/meddra.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ meddra_data <- function(path, add_smq = FALSE) {
version <- meddra_load_version(path)
if (add_smq) {
smq_data <- meddra_load_smq(path)
term_and_smq <- unique(smq_data[, c("smq_code", "term_code")])
term_and_smq <- unique(smq_data,
by = c("smq_code", "term_code"),
cols = character()
)
smq_code <- term_and_smq$smq_code[
meddra_hierarchy_match(
hierarchy,
Expand Down
10 changes: 4 additions & 6 deletions R/merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,13 @@ methods::setMethod("faers_merge", "FAERSascii", function(object, fields = NULL,
if (sum(fields %in% c("indi", "ther", "drug")) >= 2L) {
if (any(fields == "indi")) {
if (indi_reference) {
lst$indi$drug_seq <- lst$indi$indi_drug_seq
lst$indi$indi_drug_seq <- NULL
} else {
data.table::setnames(lst$indi, "indi_drug_seq", "drug_seq")
lst$indi <- dt_shallow(lst$indi)
}
data.table::setnames(lst$indi, "indi_drug_seq", "drug_seq")
}
if (any(fields == "ther")) {
lst$ther$drug_seq <- lst$ther$dsg_drug_seq
lst$ther$dsg_drug_seq <- NULL
lst$ther <- dt_shallow(lst$ther)
data.table::setnames(lst$ther, "dsg_drug_seq", "drug_seq")
}
}
Reduce(function(x, y) {
Expand Down
21 changes: 12 additions & 9 deletions R/methods-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@ methods::setMethod("faers_get", "FAERSascii", function(object, field) {
field <- match.arg(field, faers_ascii_file_fields)
out <- object@data[[field]]
if (object@standardization && any(field == c("indi", "reac"))) {
idx <- out$meddra_hierarchy_idx
cbind(out[, !"meddra_hierarchy_idx"], object@meddra@hierarchy[idx])
} else {
out
.__idx__. <- out$meddra_hierarchy_idx
out <- dt_shallow(out)
out[, meddra_hierarchy_idx := NULL]
out[, names(object@meddra@hierarchy) :=
object@meddra@hierarchy[.__idx__.]]
}
out
})

#######################################################
Expand All @@ -82,15 +84,16 @@ methods::setMethod("faers_mget", "FAERSascii", function(object, fields) {
if (object@standardization) {
ii <- intersect(names(out), c("indi", "reac"))
for (i in ii) {
meddra_hierarchy_idx <- out[[i]]$meddra_hierarchy_idx
out[[i]] <- cbind(
out[[i]][, !"meddra_hierarchy_idx"],
object@meddra@hierarchy[meddra_hierarchy_idx]
)
.__idx__. <- out[[i]]$meddra_hierarchy_idx
out[[i]] <- dt_shallow(out[[i]])
out[[i]][, meddra_hierarchy_idx := NULL]
out[[i]][, names(object@meddra@hierarchy) :=
object@meddra@hierarchy[.__idx__.]]
}
}
out
})
utils::globalVariables(c("meddra_hierarchy_idx"))

#######################################################
#' @export
Expand Down
7 changes: 4 additions & 3 deletions R/signal.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@ methods::setGeneric("faers_phv_signal", function(object, ...) {
methods::setMethod("faers_phv_signal", "FAERSascii", function(object, ..., phv_signal_params = list()) {
assert_(phv_signal_params, is.list, "a list")
out <- faers_phv_table(object, ...)
cbind(out, do.call(
.__signal__. <- do.call(
phv_signal,
c(out[, .SD, .SDcols = c("a", "b", "c", "d")], phv_signal_params)
))
c(out[, c("a", "b", "c", "d")], phv_signal_params)
)
out[, names(.__signal__.) := .__signal__.][]
})
31 changes: 14 additions & 17 deletions R/standardize.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,20 @@ methods::setMethod("faers_standardize", "FAERSascii", function(object, meddra_pa
# https://stackoverflow.com/questions/70181149/is-a-saved-and-loaded-data-table-with-qs-a-correct-data-table
# fix error: when load a saved FAERS object, don't change by reference
cli::cli_alert("standardize {.field Preferred Term} in indi")
object@data$indi$cleaned_pt <- clean_indi_pt(
object@data$indi$indi_pt, meddra@hierarchy
)
object@data$indi <- cbind(
object@data$indi,
meddra_standardize_pt(object@data$indi$cleaned_pt, meddra@hierarchy)
)
object@data$indi[, cleaned_pt := NULL]
object@data$indi <- dt_shallow(object@data$indi)

meddra_cols <- c("meddra_hierarchy_idx", "meddra_hierarchy_from", "meddra_code", "meddra_pt")
object@data$indi[, (meddra_cols) := meddra_standardize_pt(
clean_indi_pt(indi_pt, meddra@hierarchy), # nolint
meddra@hierarchy
)]

cli::cli_alert("standardize {.field Preferred Term} in reac")
object@data$reac$cleaned_pt <- clean_reac_pt(
object@data$reac$pt, meddra@hierarchy
)
object@data$reac <- cbind(
object@data$reac,
meddra_standardize_pt(object@data$reac$cleaned_pt, meddra@hierarchy)
)
object@data$reac[, cleaned_pt := NULL]
object@data$reac <- dt_shallow(object@data$reac)
object@data$reac[, (meddra_cols) := meddra_standardize_pt(
clean_reac_pt(pt, meddra@hierarchy), # nolint
meddra@hierarchy
)]
object@meddra <- meddra
object@standardization <- TRUE
object
Expand Down Expand Up @@ -219,4 +216,4 @@ faers_standardize_drug <- function(terms, athena = NULL, force = FALSE, exact =
athena_standardize_drug(terms = terms, path = athena, force = force)
}

utils::globalVariables(c("cleaned_pt"))
utils::globalVariables(c("cleaned_pt", "indi_pt", "pt"))
5 changes: 5 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ pkg_nm <- function() {
utils::packageName(topenv(environment()))
}

# https://github.com/Rdatatable/data.table/issues/3214#issuecomment-462490046
dt_shallow <- function(x) {
x[TRUE]
}

assert_internet <- function(call = rlang::caller_env()) {
if (!curl::has_internet()) {
cli::cli_abort("No internet", call = call)
Expand Down
103 changes: 76 additions & 27 deletions tests/testthat/test_standardize.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,19 @@ data <- faers(c(2004, 2017),
dir = internal_file("extdata"),
compress_dir = tempdir()
)
data_std <- faers_standardize(data, "~/Data/MedDRA/MedDRA_26_1_English",
add_smq = TRUE
)

testthat::test_that("standardize FAERS ascii data works well", {
# internal don't modify data by reference
raw_indi <- data.table::copy(data$indi)
raw_reac <- data.table::copy(data$reac)
testthat::expect_no_error(data_std <- faers_standardize(data,
"~/Data/MedDRA/MedDRA_26_1_English",
add_smq = TRUE
))
testthat::expect_identical(data$indi, raw_indi)
testthat::expect_identical(data$reac, raw_reac)

# other details works as expected
testthat::expect_true(data_std@standardization)
testthat::expect_s4_class(data_std@meddra, "MedDRA")
testthat::expect_s4_class(faers_meddra(data_std), "MedDRA")
Expand All @@ -35,36 +43,31 @@ testthat::test_that("standardize FAERS ascii data works well", {
))
})

testthat::test_that("de-duplicating FAERS ascii data works well", {
testthat::expect_error(faers_dedup(data))
# internal don't modify data by reference and drug_seq match well
data_std <- faers_standardize(data,
"~/Data/MedDRA/MedDRA_26_1_English", # nolint
add_smq = TRUE
)

testthat::test_that("`faers_get` for standardizated data works well", {
hierarchy_cols <- names(data_std@meddra@hierarchy)
# internal don't modify data by reference
raw_demo <- data.table::copy(data_std$demo)
raw_drug <- data.table::copy(data_std$drug)
raw_indi <- data.table::copy(data_std$indi)
raw_ther <- data.table::copy(data_std$ther)
raw_reac <- data.table::copy(data_std$reac)
testthat::expect_no_error(data_dedup <- faers_dedup(data_std))
testthat::expect_true(data_dedup@deduplication)
testthat::expect_equal(anyDuplicated(faers_primaryid(data_dedup)), 0L)


testthat::expect_identical(data_dedup$demo, raw_demo)
testthat::expect_identical(data_dedup$drug, raw_drug)
testthat::expect_identical(data_dedup$indi, raw_indi)
testthat::expect_identical(data_dedup$ther, raw_ther)
testthat::expect_identical(data_dedup$reac, raw_reac)
# don't introduce absent primaryid
testthat::expect_in(data_dedup$indi$primaryid, data_std$indi$primaryid)
testthat::expect_in(data_dedup$ther$primaryid, data_std$ther$primaryid)
testthat::expect_in(data_dedup$drug$primaryid, data_std$drug$primaryid)
testthat::expect_in(data_dedup$demo$primaryid, data_std$demo$primaryid)
testthat::expect_in(data_dedup$reac$primaryid, data_std$reac$primaryid)
testthat::expect_in(data_dedup$rpsr$primaryid, data_std$rpsr$primaryid)
testthat::expect_in(data_dedup$outc$primaryid, data_std$outc$primaryid)
})
faers_get(data_std, "demo")
testthat::expect_identical(data_std$demo, raw_demo)
faers_get(data_std, "indi")
testthat::expect_identical(data_std$indi, raw_indi)
faers_get(data_std, "ther")
testthat::expect_identical(data_std$ther, raw_ther)
faers_get(data_std, "drug")
testthat::expect_identical(data_std$drug, raw_drug)
faers_get(data_std, "reac")
testthat::expect_identical(data_std$reac, raw_reac)

testthat::test_that("`faers_get` for standardizated data works well", {
hierarchy_cols <- names(data_std@meddra@hierarchy)
# other details works as expected
testthat::expect_s3_class(faers_get(data_std, "indi"), "data.table")
testthat::expect_s3_class(faers_get(data_std, "reac"), "data.table")
testthat::expect_false(anyNA(faers_get(data_std, "indi")$meddra_pt))
Expand All @@ -81,6 +84,24 @@ testthat::test_that("`faers_get` for standardizated data works well", {

testthat::test_that("`faers_mget` for standardizated data works well", {
hierarchy_cols <- names(data_std@meddra@hierarchy)
# internal don't modify data by reference and drug_seq match well
raw_demo <- data.table::copy(data_std$demo)
raw_drug <- data.table::copy(data_std$drug)
raw_indi <- data.table::copy(data_std$indi)
raw_ther <- data.table::copy(data_std$ther)
raw_reac <- data.table::copy(data_std$reac)
faers_mget(data_std, "demo")
testthat::expect_identical(data_std$demo, raw_demo)
faers_mget(data_std, "indi")
testthat::expect_identical(data_std$indi, raw_indi)
faers_mget(data_std, "ther")
testthat::expect_identical(data_std$ther, raw_ther)
faers_mget(data_std, "drug")
testthat::expect_identical(data_std$drug, raw_drug)
faers_mget(data_std, "reac")
testthat::expect_identical(data_std$reac, raw_reac)

# other details
data_list <- faers_mget(data_std, c("indi", "reac", "demo", "drug"))
testthat::expect_true(is.list(data_list))
testthat::expect_true(all(
Expand Down Expand Up @@ -137,6 +158,34 @@ testthat::test_that("`[` for standardizated data works well", {
})
})

testthat::test_that("de-duplicating FAERS ascii data works well", {
testthat::expect_error(faers_dedup(data))
# internal don't modify data by reference and drug_seq match well
raw_demo <- data.table::copy(data_std$demo)
raw_drug <- data.table::copy(data_std$drug)
raw_indi <- data.table::copy(data_std$indi)
raw_ther <- data.table::copy(data_std$ther)
raw_reac <- data.table::copy(data_std$reac)
testthat::expect_no_error(data_dedup <- faers_dedup(data_std))
testthat::expect_true(data_dedup@deduplication)
testthat::expect_equal(anyDuplicated(faers_primaryid(data_dedup)), 0L)


testthat::expect_identical(data_dedup$demo, raw_demo)
testthat::expect_identical(data_dedup$drug, raw_drug)
testthat::expect_identical(data_dedup$indi, raw_indi)
testthat::expect_identical(data_dedup$ther, raw_ther)
testthat::expect_identical(data_dedup$reac, raw_reac)
# don't introduce absent primaryid
testthat::expect_in(data_dedup$indi$primaryid, data_std$indi$primaryid)
testthat::expect_in(data_dedup$ther$primaryid, data_std$ther$primaryid)
testthat::expect_in(data_dedup$drug$primaryid, data_std$drug$primaryid)
testthat::expect_in(data_dedup$demo$primaryid, data_std$demo$primaryid)
testthat::expect_in(data_dedup$reac$primaryid, data_std$reac$primaryid)
testthat::expect_in(data_dedup$rpsr$primaryid, data_std$rpsr$primaryid)
testthat::expect_in(data_dedup$outc$primaryid, data_std$outc$primaryid)
})

testthat::test_that("`faers_merge` for standardizated data works well", {
hierarchy_cols <- c(
meddra_columns(meddra_hierarchy_fields),
Expand Down
17 changes: 17 additions & 0 deletions tests/testthat/test_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,20 @@ testthat::test_that("utils-file works well", {
testthat::expect_error(locate_dir(dir, "^check_dir"))
testthat::expect_error(locate_dir(dir, "^noneexist_directory$"))
})

testthat::test_that("dt_shallow() works as expected", {
dt1 <- data.table::as.data.table(mtcars)
x <- data.table::copy(names(dt1))
dt2 <- dt_shallow(dt1)
testthat::expect_false(data.table::address(dt1) == data.table::address(dt2))
testthat::expect_identical(
vapply(dt1, rlang::obj_address, character(1L)),
vapply(dt2, rlang::obj_address, character(1L))
)
data.table::setnames(dt2, "cyl", "cyl2")
testthat::expect_equal(names(dt2)[2L], "cyl2")
testthat::expect_equal(names(dt1), x)
dt2[, mpg := NULL]
testthat::expect_equal(names(dt1), x)
testthat::expect_equal(names(dt2)[1L], "cyl2")
})

0 comments on commit e3b0073

Please sign in to comment.