Skip to content

Commit

Permalink
improve performance
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunuuuu committed Nov 6, 2023
1 parent 3c3cd17 commit 2e0ccf3
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 19 deletions.
69 changes: 50 additions & 19 deletions R/dedup.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,22 @@ dedup_faers_ascii <- function(data, deleted_cases = NULL) {
data$demo[
order(-year, -quarter, -fda_dt, i_f_code, -event_dt)
],
by = "primaryid"
by = "primaryid", cols = c(
"year", "quarter", "caseid", "caseversion",
"fda_dt", "i_f_code", "age_in_years", "gender",
"country_code", "event_dt"
)
)
} else {
out <- unique(
data$demo[!caseid %in% deleted_cases][
order(-year, -quarter, -fda_dt, i_f_code, -event_dt)
],
by = "primaryid"
by = "primaryid", cols = c(
"year", "quarter", "caseid", "caseversion",
"fda_dt", "i_f_code", "age_in_years", "gender",
"country_code", "event_dt"
)
)
}

Expand All @@ -169,29 +177,45 @@ dedup_faers_ascii <- function(data, deleted_cases = NULL) {
# match drug, indi, and ther data.
common_keys <- c("year", "quarter", "primaryid")
cli::cli_alert("merging `drug`, `indi`, `ther`, and `reac` data")
out <- data$drug[order(drug_seq),
list(aligned_drugs = paste0(drugname, collapse = "/")),
by = common_keys
][out, on = common_keys]
out[
data$drug[order(drug_seq),
list(aligned_drugs = paste0(drugname, collapse = "/")),
by = common_keys
],
aligned_drugs := i.aligned_drugs,
on = common_keys
]

# should we remove unknown indications or just translate unknown indications
# into NA ?
# meddra_code: indi_pt
# pt: 10070592 Product used for unknown indication
# llt: 10057097 Drug use for unknown indication
out <- data$indi[order(indi_drug_seq, meddra_code),
list(aligned_indi = paste0(meddra_code, collapse = "/")),
by = common_keys
][out, on = common_keys]
out <- data$ther[order(dsg_drug_seq, start_dt),
list(aligned_start_dt = paste0(start_dt, collapse = "/")),
by = common_keys
][out, on = common_keys]
out[
data$indi[order(indi_drug_seq, meddra_code),
list(aligned_indi = paste0(meddra_code, collapse = "/")),
by = common_keys
],
aligned_indi := i.aligned_indi,
on = common_keys
]
out[
data$ther[order(dsg_drug_seq, start_dt),
list(aligned_start_dt = paste0(start_dt, collapse = "/")),
by = common_keys
],
aligned_start_dt := i.aligned_start_dt,
on = common_keys
]
# meddra_code: pt
out <- data$reac[order(meddra_code),
list(aligned_reac = paste0(meddra_code, collapse = "/")),
by = common_keys
][out, on = common_keys]
out[
data$reac[order(meddra_code),
list(aligned_reac = paste0(meddra_code, collapse = "/")),
by = common_keys
],
aligned_reac := i.aligned_reac,
on = common_keys
]

# consider two cases to be the same if they had a complete match of the
# eight criteria which are gender, age, reporting country, event date, start
Expand Down Expand Up @@ -278,5 +302,12 @@ dedup_faers_ascii <- function(data, deleted_cases = NULL) {
utils::globalVariables(c(
"drug_seq", "drugname", "indi_meddra_code", "start_dt",
"indi_drug_seq", "dsg_drug_seq", "primaryid", "caseversion", "fda_dt", "i_f_code", "event_dt", "year", "caseid", "age_in_years_round",
"meddra_code"
"meddra_code",
paste0(c("", "i."), rep(
c(
"aligned_drugs", "aligned_indi", "aligned_start_dt",
"aligned_reac"
),
each = 2L
))
))
13 changes: 13 additions & 0 deletions tests/testthat/test_standardize.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,22 @@ testthat::test_that("standardize FAERS ascii data works well", {

testthat::test_that("de-duplicating FAERS ascii data works well", {
testthat::expect_error(faers_dedup(data))
# internal don't modify data by reference and drug_seq match well
raw_demo <- data.table::copy(data_std$demo)
raw_drug <- data.table::copy(data_std$drug)
raw_indi <- data.table::copy(data_std$indi)
raw_ther <- data.table::copy(data_std$ther)
raw_reac <- data.table::copy(data_std$reac)
testthat::expect_no_error(data_dedup <- faers_dedup(data_std))
testthat::expect_true(data_dedup@deduplication)
testthat::expect_equal(anyDuplicated(faers_primaryid(data_dedup)), 0L)


testthat::expect_identical(data_dedup$demo, raw_demo)
testthat::expect_identical(data_dedup$drug, raw_drug)
testthat::expect_identical(data_dedup$indi, raw_indi)
testthat::expect_identical(data_dedup$ther, raw_ther)
testthat::expect_identical(data_dedup$reac, raw_reac)
# don't introduce absent primaryid
testthat::expect_in(data_dedup$indi$primaryid, data_std$indi$primaryid)
testthat::expect_in(data_dedup$ther$primaryid, data_std$ther$primaryid)
Expand Down

0 comments on commit 2e0ccf3

Please sign in to comment.