fix fda_drugs: wrong url

Yunuuuu · Jul 2, 2024 · c71ca52 · c71ca52
1 parent 9a583f9
commit c71ca52
Show file tree

Hide file tree

Showing 7 changed files with 149 additions and 66 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: faers
 Title: R interface for FDA Adverse Event Reporting System
-Version: 1.1.1
+Version: 1.1.2
 Authors@R: 
     c(
         person("Yun", "Peng", , "yunyunp96@163.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-2801-3332")),

diff --git a/R/athena.R b/R/athena.R
@@ -72,7 +72,7 @@ athena_read <- function(file) {
 athena_file <- function(
     url, force, dir = faers_cache_dir("athena"),
     arg = rlang::caller_arg(url)) {
-    cache_file(
+    cache_use_or_download(
         force = force,
         url = url,
         prefix = "athena_vocabularies",

diff --git a/R/download.R b/R/download.R
@@ -8,7 +8,7 @@
 #' @param dir The destination directory for any downloads. Defaults to
 #'  current working dir.
 #' @param ... Extra handle options passed to each request
-#'  [new_handle][curl::new_handle].
+#'  [multi_download][curl::multi_download].
 #' @return An atomic character for the path of downloaded files.
 #' @examples
 #' # you must change `dir`, as the file included in the package is sampled
@@ -34,33 +34,39 @@ faers_download <- function(years, quarters, format = NULL, dir = getwd(), ...) {
     }
     urls <- build_faers_url(format, years, quarters)
     dest_files <- file.path(dir_create2(dir), basename(urls))
-    download_inform(urls, dest_files, handle_opts = list(...))
+    download_inform(urls, dest_files, ...)
 }
 
 #' Download utils function with good message.
 #' @return A character path if downloading successed, otherwise, stop with error
 #'   message.
 #' @noRd
-download_inform <- function(urls, file_paths, handle_opts = list()) {
+download_inform <- function(urls, file_paths, ...) {
     out <- list(
         urls = urls, destfiles = file_paths,
         is_success = rep_len(TRUE, length(urls))
     )
-    is_existed <- file.exists(file_paths)
-    if (any(is_existed)) {
-        cli::cli_inform("Finding {.val {sum(is_existed)}} file{?s} already downloaded: {.file {basename(file_paths[is_existed])}}") # nolint
+    if (any(is_existed <- file.exists(file_paths))) {
+        cli::cli_inform(paste(
+            "Finding {.val {sum(is_existed)}} file{?s} already",
+            "downloaded: {.file {basename(file_paths[is_existed])}}"
+        ))
         urls <- urls[!is_existed]
         file_paths <- file_paths[!is_existed]
     }
-    if (length(urls)) {
+    if (l <- length(urls)) {
         assert_internet()
-        cli::cli_inform("Downloading {.val {length(urls)}} file{?s}")
+        if (l == 1L) {
+            cli::cli_inform("Downloading 1 file from: {.url {urls}}")
+        } else {
+            cli::cli_inform("Downloading {.val {l}} files")
+        }
         arg_list <- c(
             list(
                 urls = urls, destfiles = file_paths, resume = FALSE,
                 progress = interactive(), timeout = Inf
             ),
-            handle_opts
+            rlang::list2(...)
         )
         status <- do.call(curl::multi_download, arg_list)
         is_success <- is_download_success(status)
@@ -73,7 +79,10 @@ download_inform <- function(urls, file_paths, handle_opts = list()) {
             cli::cli_abort(c(
                 "Cannot download {.val {n_failed_files}} file{?s}",
                 "i" = "url{?s}: {.url {urls[!is_success]}}",
-                "!" = "status {cli::qty(n_failed_files)} code{?s}: {.val {status$status_code[!is_success]}}",
+                "!" = paste(
+                    "status {cli::qty(n_failed_files)} code{?s}:",
+                    "{.val {status[!is_success]}}"
+                ),
                 x = "error {cli::qty(n_failed_files)} message{?s}: {.val {status$error[!is_success]}}"
             ))
         }
@@ -88,6 +97,44 @@ is_download_success <- function(status, successful_code = c(200L, 206L, 416L)) {
         status$status_code %in% successful_code
 }
 
+base_download_inform <- function(urls, file_paths, ...) {
+    out <- file_paths
+    if (any(is_existed <- file.exists(file_paths))) {
+        cli::cli_inform(paste(
+            "Finding {.val {sum(is_existed)}} file{?s} already",
+            "downloaded: {.file {basename(file_paths[is_existed])}}"
+        )) # nolint
+        urls <- urls[!is_existed]
+        file_paths <- file_paths[!is_existed]
+    }
+    if (l <- length(urls)) {
+        assert_internet()
+        if (l == 1L) {
+            cli::cli_inform("Downloading 1 file from: {.url {urls}}")
+        } else {
+            cli::cli_inform("Downloading {.val {l}} files")
+        }
+        status <- utils::download.file(urls,
+            destfile = file_paths, ..., method = "libcurl"
+        )
+        is_success <- status == 0L
+        is_need_deleted <- !is_success & file.exists(file_paths)
+        if (any(is_need_deleted)) file.remove(file_paths[is_need_deleted])
+        if (!all(is_success)) {
+            n_failed_files <- sum(!is_success) # nolint
+            cli::cli_abort(c(
+                "Cannot download {.val {n_failed_files}} file{?s}",
+                "i" = "url{?s}: {.url {urls[!is_success]}}",
+                "!" = paste(
+                    "status {cli::qty(n_failed_files)} code{?s}:",
+                    "{.val {status[!is_success]}}"
+                )
+            ))
+        }
+    }
+    out
+}
+
 build_faers_url <- function(type, years, quarters) {
     laers_period <- is_from_laers(years, quarters)
     sprintf(

diff --git a/R/fda_drugs.R b/R/fda_drugs.R
@@ -6,23 +6,27 @@
 #' dataset?
 #' @param force A boolean value. If set to `TRUE`, it indicates the retrieval of
 #' `Drugs@@FDA` data in the FDA directly, bypassing the cache.
+#' @param url A string of the url for `Drugs@@FDA` file. Try to get the link
+#' from site:
+#' <https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-data-files>.
 #' @return
 #' - if `list = TRUE`, an atomic character.
 #' - if `list = FALSE`, a [data.table][data.table::data.table].
-#' @seealso
-#' <https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-data-files>
 #' @examples
 #' fda_drugs(list = TRUE)
 #' fda_drugs()
 #' @export
-fda_drugs <- function(pattern = "Products", list = FALSE, force = FALSE) {
+fda_drugs <- function(pattern = "Products", url = NULL,
+                      list = FALSE, force = FALSE) {
     assert_bool(list)
     assert_bool(force)
-    file <- fda_drugs_file(force)
+    assert_string(url, null_ok = TRUE)
+    file <- fda_drugs_file(url, force)
     fda_drugs_load(file, pattern = pattern, list = list)
 }
 
-fda_drugs_load <- function(file, pattern = "Products", list = FALSE, dir = faers_cache_dir("fdadrugs")) {
+fda_drugs_load <- function(file, pattern = "Products",
+                           list = FALSE, dir = faers_cache_dir("fdadrugs")) {
     path <- unzip2(file, dir)
     if (list) {
         list.files(path)
@@ -35,28 +39,43 @@ fda_drugs_load <- function(file, pattern = "Products", list = FALSE, dir = faers
     }
 }
 
-fda_drugs_file <- function(force, dir = faers_cache_dir("fdadrugs")) {
-    cache_file(
+fda_drugs_file <- function(url = NULL, force,
+                           dir = faers_cache_dir("fdadrugs"),
+                           arg = rlang::caller_arg(url),
+                           call = rlang::caller_env()) {
+    cache_use_or_download(
         force = force,
-        url = fda_drugs_url(),
+        url = fda_drugs_url(url, arg = arg, call = call),
         prefix = "fda_drugs_data",
         ext = "zip",
         name = "Drugs@FDA data",
-        dir = dir
+        dir = dir, 
+        method = "base",
+        arg = arg, call = call
     )
 }
 
-fda_drugs_url <- function(call = rlang::caller_env()) {
+fda_drugs_url <- function(url = NULL,
+                          arg = rlang::caller_arg(url),
+                          call = rlang::caller_env()) {
+    if (!is.null(url)) return(url) # styler: off
     assert_internet(call = call)
     url <- sprintf(
         "%s/drugs/drug-approvals-and-databases/drugsfda-data-files",
         fda_host("www")
     )
     cli::cli_inform(c(">" = "Reading html: {.url {url}}"))
     html <- xml2::read_html(url)
-    node <- rvest::html_element(
-        html, "[data-entity-substitution=media_download]"
-    )
-    cli::cat_line(rvest::html_text(node))
-    paste0(fda_host("www"), rvest::html_attr(node, "href"))
+    node <- rvest::html_element(html, "[data-entity-substitution]")
+    if (inherits(node, "xml_missing")) {
+        # cli::cli_abort(c(
+        #     "Cannot determine the url of {.field Drugs@FDA} file",
+        #     i = "try to provide {.arg {arg}} manually"
+        # ), call = call)
+        href <- "/media/89850/download?attachment"
+    } else {
+        cli::cat_line(rvest::html_text(node))
+        href <- rvest::html_attr(node, "href")
+    }
+    paste0(fda_host("www"), href)
 }
diff --git a/R/utils-file.R b/R/utils-file.R
@@ -40,11 +40,10 @@ delete_cache <- function(path, ..., name = NULL) {
     }
 }
 
-cache_file <- function(
-    force, url, prefix,
-    ext = NULL, name, dir,
-    arg = rlang::caller_arg(url),
-    call = rlang::caller_env()) {
+cache_use_or_download <- function(force, url, prefix,
+                                  ext = NULL, name, dir, ...,
+                                  arg = rlang::caller_arg(url),
+                                  call = rlang::caller_env()) {
     if (!force) {
         pattern <- sprintf("^%s_\\d+-\\d+-\\d+", prefix)
         if (!is.null(ext)) {
@@ -54,41 +53,56 @@ cache_file <- function(
         }
         file <- tryCatch(
             locate_files(dir, pattern, ignore.case = FALSE),
-            no_file = function(cnd) {
-                force <<- TRUE
-            }
+            no_file = function(cnd) force <<- TRUE
         )
     }
     if (force) {
-        if (is.null(url)) {
-            cli::cli_abort("You must provide {.arg {arg}}", call = call)
-        }
-        file <- cache_download(url, prefix = prefix, ext = ext, dir = dir)
-    } else {
-        date <- as.Date(
-            str_extract(basename(file), "\\d+-\\d+-\\d+"),
-            format = "%Y-%m-%d"
+        cache_download(
+            url = url,
+            prefix = prefix, ext = ext, dir = dir,
+            ...,
+            arg = arg, call = call
         )
-        if (length(file) > 1L) {
-            i <- order(date, decreasing = TRUE)[1L]
-            file <- file[i]
-            date <- date[i]
-        }
-        cli::cli_inform(c(
-            ">" = "Using {name} from cached {.file {file}}",
-            " " = "Snapshot date: {date}"
-        ))
+    } else {
+        cache_use(file, name = name)
     }
+}
+
+cache_use <- function(file, name) {
+    date <- as.Date(
+        str_extract(basename(file), "\\d+-\\d+-\\d+"),
+        format = "%Y-%m-%d"
+    )
+    if (length(file) > 1L) {
+        i <- order(date, decreasing = TRUE)[1L]
+        file <- file[i]
+        date <- date[i]
+    }
+    cli::cli_inform(c(
+        ">" = "Using {name} from cached {.file {file}}",
+        " " = "Snapshot date: {date}"
+    ))
     file
 }
 
-cache_download <- function(url, prefix, ext, dir, call = rlang::caller_env()) {
+cache_download <- function(url, prefix, ext, dir, method = NULL,
+                           ...,
+                           arg = rlang::caller_arg(url),
+                           call = rlang::caller_env()) {
+    if (is.null(url)) {
+        cli::cli_abort("You must provide {.arg {arg}}", call = call)
+    }
     assert_internet(call = call)
+    method <- match.arg(method, c("base", "curl"))
     file <- file.path(dir, paste(prefix, Sys.Date(), sep = "_"))
     if (!is.null(ext)) {
         file <- paste(file, ext, sep = ".")
     }
-    download_inform(url, file)
+    if (identical(method, "base")) {
+        base_download_inform(url, file)
+    } else {
+        download_inform(url, file, ...)
+    }
 }
 
 # name used: metadata, fdadrugs, rxnorm, athena
@@ -111,7 +125,8 @@ faers_user_cache_dir <- function(create = TRUE) {
 }
 
 # file or path utils function --------------
-dir_or_unzip <- function(path, compress_dir, pattern, none_msg, ignore.case = TRUE) {
+dir_or_unzip <- function(path, compress_dir, pattern, none_msg,
+                         ignore.case = TRUE) {
     if (dir.exists(path)) {
         return(path)
     } else if (file.exists(path)) {
@@ -138,14 +153,15 @@ zip2 <- function(zipfile, files, ..., root = getwd()) {
 #' Will always add the basename into the compress_dir
 #' @noRd
 unzip2 <- function(path, compress_dir, ignore.case = TRUE) {
-    compress_dir <- file.path(dir_create2(compress_dir), str_remove(
-        basename(path), "\\.zip$",
-        ignore.case = ignore.case
-    ))
-    if (is.null(utils::unzip(path, exdir = dir_create2(compress_dir), overwrite = TRUE))) {
+    compress_dir <- file.path(
+        dir_create2(compress_dir),
+        str_remove(basename(path), "\\.zip$", ignore.case = ignore.case)
+    )
+    exdir <- dir_create2(compress_dir)
+    if (is.null(utils::unzip(path, exdir = exdir, overwrite = TRUE))) {
         cli::cli_abort("Cannot uncompress {.file {path}}")
     }
-    compress_dir
+    exdir
 }
 
 locate_dir <- function(path, pattern = NULL, ignore.case = TRUE) {

diff --git a/man/faers_download.Rd b/man/faers_download.Rd
diff --git a/man/fda_drugs.Rd b/man/fda_drugs.Rd