add utils cache_file

Yunuuuu · Dec 16, 2023 · a90bc03 · a90bc03
1 parent 46bfee4
commit a90bc03
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 53 deletions.
diff --git a/R/fda_drugs.R b/R/fda_drugs.R
@@ -1,72 +1,49 @@
 #' Read and Parse Drugs@@FDA data
 #'
-#' @param use File pattern to use. Must define a file exactly, you can set `list
-#' = TRUE` to see what files can be used.
-#' @param list A boolean value, should it only list files in the Drugs@@FDA
+#' @param pattern File pattern to use. Must define a file exactly, you can set
+#' `list = TRUE` to see what files can be used.
+#' @param list A boolean value, should it only list files in the `Drugs@@FDA`
 #' dataset?
 #' @param force A boolean value. If set to `TRUE`, it indicates the retrieval of
 #' `Drugs@@FDA` data in the FDA directly, bypassing the cache.
 #' @return
 #' - if `list = TRUE`, an atomic character.
-#' - if `list = FALSE`, a [data.table][data.table::data.table]
+#' - if `list = FALSE`, a [data.table][data.table::data.table].
 #' @seealso
 #' <https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-data-files>
 #' @examples
 #' fda_drugs(list = TRUE)
 #' fda_drugs()
 #' @export
-fda_drugs <- function(use = "Products", list = FALSE, force = FALSE) {
+fda_drugs <- function(pattern = "Products", list = FALSE, force = FALSE) {
     assert_bool(list)
     assert_bool(force)
-    if (force) {
-        file <- fda_drugs_download(dir = faers_cache_dir("fdadrugs"))
-    } else {
-        file <- fda_drugs_file()
-    }
-    fda_drugs_load(file, use = use, list = list)
+    file <- fda_drugs_file(force)
+    fda_drugs_load(file, pattern = pattern, list = list)
 }
 
-fda_drugs_load <- function(file, use = "Products", list = FALSE, dir = faers_cache_dir("fdadrugs")) {
+fda_drugs_load <- function(file, pattern = "Products", list = FALSE, dir = faers_cache_dir("fdadrugs")) {
     path <- unzip2(file, dir)
     if (list) {
         list.files(path)
     } else {
-        file <- locate_file(path, use, ignore.case = TRUE)
+        assert_string(pattern)
+        file <- locate_file(path, pattern, ignore.case = TRUE)
         # Don't use data.table: error, Stopped early on line
         out <- vroom::vroom(file, show_col_types = FALSE)
         data.table::setDT(out)[]
     }
 }
 
-fda_drugs_file <- function(dir = faers_cache_dir("fdadrugs")) {
-    file <- tryCatch(
-        locate_files(dir, "^fda_drugs_data.*\\.zip", ignore.case = FALSE),
-        no_file = function(cnd) FALSE
+fda_drugs_file <- function(force, dir = faers_cache_dir("fdadrugs")) {
+    cache_file(
+        force = force,
+        url = fda_drugs_url(),
+        prefix = "fda_drugs_data",
+        ext = "zip",
+        name = "Drugs@FDA data",
+        dir = dir
     )
-    if (isFALSE(file)) {
-        file <- fda_drugs_download(dir = dir)
-    } else {
-        date <- as.Date(
-            str_extract(basename(file), "\\d+-\\d+-\\d+"),
-            format = "%Y-%m-%d"
-        )
-        if (length(file) > 1L) {
-            i <- order(date, decreasing = TRUE)[1L]
-            file <- file[i]
-            date <- date[i]
-        }
-        cli::cli_inform(c(
-            ">" = "Using Drugs@FDA Data from cached {.file {file}}",
-            " " = "Snapshot date: {date}"
-        ))
-    }
-    file
-}
-
-fda_drugs_download <- function(dir = faers_cache_dir("fdadrugs"), call = rlang::caller_env()) {
-    assert_internet(call = call)
-    file <- file.path(dir, sprintf("fda_drugs_data_%s.zip", Sys.Date()))
-    download_inform(fda_drugs_url(), file)
 }
 
 fda_drugs_url <- function() {

diff --git a/R/standardize.R b/R/standardize.R
@@ -213,8 +213,8 @@ clean_reac_pt <- function(x, hierarchy) {
 }
 
 ########################################################
-faers_standardize_drug <- function(terms, athena = NULL, force = FALSE, exact = TRUE, approximate = TRUE, search = 2L) {
-    athena_standardize_drug(terms = terms, path = athena, force = force)
+faers_standardize_drug <- function(terms, force = FALSE, url = NULL) {
+    athena_standardize_drug(terms = terms, force = force, url = url)
 }
 
 utils::globalVariables(c("cleaned_pt", "indi_pt", "pt"))
diff --git a/R/utils-file.R b/R/utils-file.R
@@ -1,8 +1,8 @@
 #' Remove caches
 #'
 #' @param caches An atomic character, indicates what caches to remove? Only
-#' `"metadata"`, `"fdadrugs"`, `"rxnorm"`, and `"athena"` can be used. If
-#' `NULL`, all caches will be removed.
+#' `r quote_strings(cached_nms)` can be used. If `NULL`, all caches will be
+#' removed.
 #' @inheritParams base::unlink
 #' @return Path of the deleted directory invisiblely
 #' @examples
@@ -12,7 +12,7 @@ faers_clearcache <- function(caches = NULL, force = FALSE) {
     if (is.null(caches)) {
         paths <- faers_user_cache_dir(create = FALSE)
     } else {
-        assert_inclusive(caches, c("metadata", "fdadrugs", "rxnorm", "athena"))
+        assert_inclusive(caches, cached_nms)
         paths <- faers_cache_dir(caches, create = FALSE)
     }
     for (path in paths) {
@@ -21,9 +21,10 @@ faers_clearcache <- function(caches = NULL, force = FALSE) {
     invisible(paths)
 }
 
+cached_nms <- c("metadata", "fdadrugs", "athena")
+
 delete_cache <- function(path, ..., name = NULL) {
     if (dir.exists(path)) {
-        # Not deleting a non-existent file is not a failure
         if (unlink(path, recursive = TRUE, ...)) {
             cli::cli_warn("Cannot remove {.path {path}}")
         } else {
@@ -39,6 +40,57 @@ delete_cache <- function(path, ..., name = NULL) {
     }
 }
 
+cache_file <- function(
+    force, url, prefix,
+    ext = NULL, name, dir,
+    arg = rlang::caller_arg(url),
+    call = rlang::caller_env()) {
+    if (!force) {
+        pattern <- sprintf("^%s_\\d+-\\d+-\\d+", prefix)
+        if (!is.null(ext)) {
+            pattern <- paste0(pattern, "\\.", ext, "$")
+        } else {
+            pattern <- paste0(pattern, "$")
+        }
+        file <- tryCatch(
+            locate_files(dir, pattern, ignore.case = FALSE),
+            no_file = function(cnd) {
+                force <<- TRUE
+            }
+        )
+    }
+    if (force) {
+        if (is.null(url)) {
+            cli::cli_abort("You must provide {.arg {arg}}", call = call)
+        }
+        file <- cache_download(url, prefix = prefix, ext = ext, dir = dir)
+    } else {
+        date <- as.Date(
+            str_extract(basename(file), "\\d+-\\d+-\\d+"),
+            format = "%Y-%m-%d"
+        )
+        if (length(file) > 1L) {
+            i <- order(date, decreasing = TRUE)[1L]
+            file <- file[i]
+            date <- date[i]
+        }
+        cli::cli_inform(c(
+            ">" = "Using {name} from cached {.file {file}}",
+            " " = "Snapshot date: {date}"
+        ))
+    }
+    file
+}
+
+cache_download <- function(url, prefix, ext, dir, call = rlang::caller_env()) {
+    assert_internet(call = call)
+    file <- file.path(dir, paste(prefix, Sys.Date(), sep = "_"))
+    if (!is.null(ext)) {
+        file <- paste(file, ext, sep = ".")
+    }
+    download_inform(url, file)
+}
+
 # name used: metadata, fdadrugs, rxnorm, athena
 faers_cache_dir <- function(name, create = TRUE) {
     path <- file.path(faers_user_cache_dir(create = create), name)
@@ -147,6 +199,24 @@ locate_files <- function(path, pattern = NULL, ignore.case = TRUE) {
     files
 }
 
+use_files <- function(dir, use, ext = NULL) {
+    if (!is.null(ext)) {
+        pattern <- sprintf("\\.%s$", ext)
+    } else {
+        pattern <- NULL
+    }
+    files <- locate_files(dir, pattern)
+    ids <- tolower(path_ext_remove(basename(files)))
+    use <- as.character(use)
+    idx <- data.table::chmatch(use, ids)
+    if (anyNA(idx)) {
+        cli::cli_abort(sprintf("Cannot find %s", oxford_comma(use[is.na(idx)])))
+    }
+    files <- files[idx]
+    names(files) <- ids[idx]
+    files
+}
+
 dir_create2 <- function(dir, ...) {
     if (!dir.exists(dir)) {
         if (!dir.create(dir, showWarnings = FALSE, ...)) {
@@ -159,3 +229,7 @@ dir_create2 <- function(dir, ...) {
 internal_file <- function(...) {
     system.file(..., package = pkg_nm(), mustWork = TRUE)
 }
+
+path_ext_remove <- function(x) {
+    sub("([^.]+)\\.[[:alnum:]]+$", "\\1", x)
+}
diff --git a/R/utils.R b/R/utils.R
@@ -4,6 +4,10 @@ pkg_nm <- function() {
     utils::packageName(topenv(environment()))
 }
 
+quote_strings <- function(x, ...) {
+    oxford_comma(sprintf('"%s"', x), ...)
+}
+
 # https://github.com/Rdatatable/data.table/issues/3214#issuecomment-462490046
 dt_shallow <- function(x) {
     x[TRUE]

diff --git a/man/faers_clearcache.Rd b/man/faers_clearcache.Rd
diff --git a/man/fda_drugs.Rd b/man/fda_drugs.Rd