improve performance

Yunuuuu · Nov 6, 2023 · 2e0ccf3 · 2e0ccf3
1 parent 3c3cd17
commit 2e0ccf3
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 19 deletions.
diff --git a/R/dedup.R b/R/dedup.R
@@ -143,14 +143,22 @@ dedup_faers_ascii <- function(data, deleted_cases = NULL) {
  data$demo[
  order(-year, -quarter, -fda_dt, i_f_code, -event_dt)
  ],
- by = "primaryid"
+ by = "primaryid", cols = c(
+ "year", "quarter", "caseid", "caseversion",
+ "fda_dt", "i_f_code", "age_in_years", "gender",
+ "country_code", "event_dt"
+ )
  )
  } else {
  out <- unique(
  data$demo[!caseid %in% deleted_cases][
  order(-year, -quarter, -fda_dt, i_f_code, -event_dt)
  ],
- by = "primaryid"
+ by = "primaryid", cols = c(
+ "year", "quarter", "caseid", "caseversion",
+ "fda_dt", "i_f_code", "age_in_years", "gender",
+ "country_code", "event_dt"
+ )
  )
  }
 
@@ -169,29 +177,45 @@ dedup_faers_ascii <- function(data, deleted_cases = NULL) {
  # match drug, indi, and ther data.
  common_keys <- c("year", "quarter", "primaryid")
  cli::cli_alert("merging `drug`, `indi`, `ther`, and `reac` data")
- out <- data$drug[order(drug_seq),
- list(aligned_drugs = paste0(drugname, collapse = "/")),
- by = common_keys
- ][out, on = common_keys]
+ out[
+ data$drug[order(drug_seq),
+ list(aligned_drugs = paste0(drugname, collapse = "/")),
+ by = common_keys
+ ],
+ aligned_drugs := i.aligned_drugs,
+ on = common_keys
+ ]
 
  # should we remove unknown indications or just translate unknown indications
  # into NA ?
  # meddra_code: indi_pt
  # pt: 10070592 Product used for unknown indication
  # llt: 10057097 Drug use for unknown indication
- out <- data$indi[order(indi_drug_seq, meddra_code),
- list(aligned_indi = paste0(meddra_code, collapse = "/")),
- by = common_keys
- ][out, on = common_keys]
- out <- data$ther[order(dsg_drug_seq, start_dt),
- list(aligned_start_dt = paste0(start_dt, collapse = "/")),
- by = common_keys
- ][out, on = common_keys]
+ out[
+ data$indi[order(indi_drug_seq, meddra_code),
+ list(aligned_indi = paste0(meddra_code, collapse = "/")),
+ by = common_keys
+ ],
+ aligned_indi := i.aligned_indi,
+ on = common_keys
+ ]
+ out[
+ data$ther[order(dsg_drug_seq, start_dt),
+ list(aligned_start_dt = paste0(start_dt, collapse = "/")),
+ by = common_keys
+ ],
+ aligned_start_dt := i.aligned_start_dt,
+ on = common_keys
+ ]
  # meddra_code: pt
- out <- data$reac[order(meddra_code),
- list(aligned_reac = paste0(meddra_code, collapse = "/")),
- by = common_keys
- ][out, on = common_keys]
+ out[
+ data$reac[order(meddra_code),
+ list(aligned_reac = paste0(meddra_code, collapse = "/")),
+ by = common_keys
+ ],
+ aligned_reac := i.aligned_reac,
+ on = common_keys
+ ]
 
  # consider two cases to be the same if they had a complete match of the
  # eight criteria which are gender, age, reporting country, event date, start
@@ -278,5 +302,12 @@ dedup_faers_ascii <- function(data, deleted_cases = NULL) {
 utils::globalVariables(c(
  "drug_seq", "drugname", "indi_meddra_code", "start_dt",
  "indi_drug_seq", "dsg_drug_seq", "primaryid", "caseversion", "fda_dt", "i_f_code", "event_dt", "year", "caseid", "age_in_years_round",
- "meddra_code"
+ "meddra_code",
+ paste0(c("", "i."), rep(
+ c(
+ "aligned_drugs", "aligned_indi", "aligned_start_dt",
+ "aligned_reac"
+ ),
+ each = 2L
+ ))
 ))
diff --git a/tests/testthat/test_standardize.R b/tests/testthat/test_standardize.R
@@ -37,9 +37,22 @@ testthat::test_that("standardize FAERS ascii data works well", {
 
 testthat::test_that("de-duplicating FAERS ascii data works well", {
  testthat::expect_error(faers_dedup(data))
+ # internal don't modify data by reference and drug_seq match well
+ raw_demo <- data.table::copy(data_std$demo)
+ raw_drug <- data.table::copy(data_std$drug)
+ raw_indi <- data.table::copy(data_std$indi)
+ raw_ther <- data.table::copy(data_std$ther)
+ raw_reac <- data.table::copy(data_std$reac)
  testthat::expect_no_error(data_dedup <- faers_dedup(data_std))
  testthat::expect_true(data_dedup@deduplication)
  testthat::expect_equal(anyDuplicated(faers_primaryid(data_dedup)), 0L)
+
+
+ testthat::expect_identical(data_dedup$demo, raw_demo)
+ testthat::expect_identical(data_dedup$drug, raw_drug)
+ testthat::expect_identical(data_dedup$indi, raw_indi)
+ testthat::expect_identical(data_dedup$ther, raw_ther)
+ testthat::expect_identical(data_dedup$reac, raw_reac)
  # don't introduce absent primaryid
  testthat::expect_in(data_dedup$indi$primaryid, data_std$indi$primaryid)
  testthat::expect_in(data_dedup$ther$primaryid, data_std$ther$primaryid)