Skip to content

Commit

Permalink
fix fda_drugs: wrong url
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunuuuu committed Jul 2, 2024
1 parent 9a583f9 commit c71ca52
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 66 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: faers
Title: R interface for FDA Adverse Event Reporting System
Version: 1.1.1
Version: 1.1.2
Authors@R:
c(
person("Yun", "Peng", , "yunyunp96@163.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-2801-3332")),
Expand Down
2 changes: 1 addition & 1 deletion R/athena.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ athena_read <- function(file) {
athena_file <- function(
url, force, dir = faers_cache_dir("athena"),
arg = rlang::caller_arg(url)) {
cache_file(
cache_use_or_download(
force = force,
url = url,
prefix = "athena_vocabularies",
Expand Down
67 changes: 57 additions & 10 deletions R/download.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#' @param dir The destination directory for any downloads. Defaults to
#' current working dir.
#' @param ... Extra handle options passed to each request
#' [new_handle][curl::new_handle].
#' [multi_download][curl::multi_download].
#' @return An atomic character for the path of downloaded files.
#' @examples
#' # you must change `dir`, as the file included in the package is sampled
Expand All @@ -34,33 +34,39 @@ faers_download <- function(years, quarters, format = NULL, dir = getwd(), ...) {
}
urls <- build_faers_url(format, years, quarters)
dest_files <- file.path(dir_create2(dir), basename(urls))
download_inform(urls, dest_files, handle_opts = list(...))
download_inform(urls, dest_files, ...)
}

#' Download utils function with good message.
#' @return A character path if downloading successed, otherwise, stop with error
#' message.
#' @noRd
download_inform <- function(urls, file_paths, handle_opts = list()) {
download_inform <- function(urls, file_paths, ...) {
out <- list(
urls = urls, destfiles = file_paths,
is_success = rep_len(TRUE, length(urls))
)
is_existed <- file.exists(file_paths)
if (any(is_existed)) {
cli::cli_inform("Finding {.val {sum(is_existed)}} file{?s} already downloaded: {.file {basename(file_paths[is_existed])}}") # nolint
if (any(is_existed <- file.exists(file_paths))) {
cli::cli_inform(paste(
"Finding {.val {sum(is_existed)}} file{?s} already",
"downloaded: {.file {basename(file_paths[is_existed])}}"
))
urls <- urls[!is_existed]
file_paths <- file_paths[!is_existed]
}
if (length(urls)) {
if (l <- length(urls)) {
assert_internet()
cli::cli_inform("Downloading {.val {length(urls)}} file{?s}")
if (l == 1L) {
cli::cli_inform("Downloading 1 file from: {.url {urls}}")
} else {
cli::cli_inform("Downloading {.val {l}} files")
}
arg_list <- c(
list(
urls = urls, destfiles = file_paths, resume = FALSE,
progress = interactive(), timeout = Inf
),
handle_opts
rlang::list2(...)
)
status <- do.call(curl::multi_download, arg_list)
is_success <- is_download_success(status)
Expand All @@ -73,7 +79,10 @@ download_inform <- function(urls, file_paths, handle_opts = list()) {
cli::cli_abort(c(
"Cannot download {.val {n_failed_files}} file{?s}",
"i" = "url{?s}: {.url {urls[!is_success]}}",
"!" = "status {cli::qty(n_failed_files)} code{?s}: {.val {status$status_code[!is_success]}}",
"!" = paste(
"status {cli::qty(n_failed_files)} code{?s}:",
"{.val {status[!is_success]}}"
),
x = "error {cli::qty(n_failed_files)} message{?s}: {.val {status$error[!is_success]}}"
))
}
Expand All @@ -88,6 +97,44 @@ is_download_success <- function(status, successful_code = c(200L, 206L, 416L)) {
status$status_code %in% successful_code
}

base_download_inform <- function(urls, file_paths, ...) {
out <- file_paths
if (any(is_existed <- file.exists(file_paths))) {
cli::cli_inform(paste(
"Finding {.val {sum(is_existed)}} file{?s} already",
"downloaded: {.file {basename(file_paths[is_existed])}}"
)) # nolint
urls <- urls[!is_existed]
file_paths <- file_paths[!is_existed]
}
if (l <- length(urls)) {
assert_internet()
if (l == 1L) {
cli::cli_inform("Downloading 1 file from: {.url {urls}}")
} else {
cli::cli_inform("Downloading {.val {l}} files")
}
status <- utils::download.file(urls,
destfile = file_paths, ..., method = "libcurl"
)
is_success <- status == 0L
is_need_deleted <- !is_success & file.exists(file_paths)
if (any(is_need_deleted)) file.remove(file_paths[is_need_deleted])
if (!all(is_success)) {
n_failed_files <- sum(!is_success) # nolint
cli::cli_abort(c(
"Cannot download {.val {n_failed_files}} file{?s}",
"i" = "url{?s}: {.url {urls[!is_success]}}",
"!" = paste(
"status {cli::qty(n_failed_files)} code{?s}:",
"{.val {status[!is_success]}}"
)
))
}
}
out
}

build_faers_url <- function(type, years, quarters) {
laers_period <- is_from_laers(years, quarters)
sprintf(
Expand Down
49 changes: 34 additions & 15 deletions R/fda_drugs.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,27 @@
#' dataset?
#' @param force A boolean value. If set to `TRUE`, it indicates the retrieval of
#' `Drugs@@FDA` data in the FDA directly, bypassing the cache.
#' @param url A string of the url for `Drugs@@FDA` file. Try to get the link
#' from site:
#' <https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-data-files>.
#' @return
#' - if `list = TRUE`, an atomic character.
#' - if `list = FALSE`, a [data.table][data.table::data.table].
#' @seealso
#' <https://www.fda.gov/drugs/drug-approvals-and-databases/drugsfda-data-files>
#' @examples
#' fda_drugs(list = TRUE)
#' fda_drugs()
#' @export
fda_drugs <- function(pattern = "Products", list = FALSE, force = FALSE) {
fda_drugs <- function(pattern = "Products", url = NULL,
list = FALSE, force = FALSE) {
assert_bool(list)
assert_bool(force)
file <- fda_drugs_file(force)
assert_string(url, null_ok = TRUE)
file <- fda_drugs_file(url, force)
fda_drugs_load(file, pattern = pattern, list = list)
}

fda_drugs_load <- function(file, pattern = "Products", list = FALSE, dir = faers_cache_dir("fdadrugs")) {
fda_drugs_load <- function(file, pattern = "Products",
list = FALSE, dir = faers_cache_dir("fdadrugs")) {
path <- unzip2(file, dir)
if (list) {
list.files(path)
Expand All @@ -35,28 +39,43 @@ fda_drugs_load <- function(file, pattern = "Products", list = FALSE, dir = faers
}
}

fda_drugs_file <- function(force, dir = faers_cache_dir("fdadrugs")) {
cache_file(
fda_drugs_file <- function(url = NULL, force,
dir = faers_cache_dir("fdadrugs"),
arg = rlang::caller_arg(url),
call = rlang::caller_env()) {
cache_use_or_download(
force = force,
url = fda_drugs_url(),
url = fda_drugs_url(url, arg = arg, call = call),
prefix = "fda_drugs_data",
ext = "zip",
name = "Drugs@FDA data",
dir = dir
dir = dir,
method = "base",
arg = arg, call = call
)
}

fda_drugs_url <- function(call = rlang::caller_env()) {
fda_drugs_url <- function(url = NULL,
arg = rlang::caller_arg(url),
call = rlang::caller_env()) {
if (!is.null(url)) return(url) # styler: off
assert_internet(call = call)
url <- sprintf(
"%s/drugs/drug-approvals-and-databases/drugsfda-data-files",
fda_host("www")
)
cli::cli_inform(c(">" = "Reading html: {.url {url}}"))
html <- xml2::read_html(url)
node <- rvest::html_element(
html, "[data-entity-substitution=media_download]"
)
cli::cat_line(rvest::html_text(node))
paste0(fda_host("www"), rvest::html_attr(node, "href"))
node <- rvest::html_element(html, "[data-entity-substitution]")
if (inherits(node, "xml_missing")) {
# cli::cli_abort(c(
# "Cannot determine the url of {.field Drugs@FDA} file",
# i = "try to provide {.arg {arg}} manually"
# ), call = call)
href <- "/media/89850/download?attachment"
} else {
cli::cat_line(rvest::html_text(node))
href <- rvest::html_attr(node, "href")
}
paste0(fda_host("www"), href)
}
84 changes: 50 additions & 34 deletions R/utils-file.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,10 @@ delete_cache <- function(path, ..., name = NULL) {
}
}

cache_file <- function(
force, url, prefix,
ext = NULL, name, dir,
arg = rlang::caller_arg(url),
call = rlang::caller_env()) {
cache_use_or_download <- function(force, url, prefix,
ext = NULL, name, dir, ...,
arg = rlang::caller_arg(url),
call = rlang::caller_env()) {
if (!force) {
pattern <- sprintf("^%s_\\d+-\\d+-\\d+", prefix)
if (!is.null(ext)) {
Expand All @@ -54,41 +53,56 @@ cache_file <- function(
}
file <- tryCatch(
locate_files(dir, pattern, ignore.case = FALSE),
no_file = function(cnd) {
force <<- TRUE
}
no_file = function(cnd) force <<- TRUE
)
}
if (force) {
if (is.null(url)) {
cli::cli_abort("You must provide {.arg {arg}}", call = call)
}
file <- cache_download(url, prefix = prefix, ext = ext, dir = dir)
} else {
date <- as.Date(
str_extract(basename(file), "\\d+-\\d+-\\d+"),
format = "%Y-%m-%d"
cache_download(
url = url,
prefix = prefix, ext = ext, dir = dir,
...,
arg = arg, call = call
)
if (length(file) > 1L) {
i <- order(date, decreasing = TRUE)[1L]
file <- file[i]
date <- date[i]
}
cli::cli_inform(c(
">" = "Using {name} from cached {.file {file}}",
" " = "Snapshot date: {date}"
))
} else {
cache_use(file, name = name)
}
}

cache_use <- function(file, name) {
date <- as.Date(
str_extract(basename(file), "\\d+-\\d+-\\d+"),
format = "%Y-%m-%d"
)
if (length(file) > 1L) {
i <- order(date, decreasing = TRUE)[1L]
file <- file[i]
date <- date[i]
}
cli::cli_inform(c(
">" = "Using {name} from cached {.file {file}}",
" " = "Snapshot date: {date}"
))
file
}

cache_download <- function(url, prefix, ext, dir, call = rlang::caller_env()) {
cache_download <- function(url, prefix, ext, dir, method = NULL,
...,
arg = rlang::caller_arg(url),
call = rlang::caller_env()) {
if (is.null(url)) {
cli::cli_abort("You must provide {.arg {arg}}", call = call)
}
assert_internet(call = call)
method <- match.arg(method, c("base", "curl"))
file <- file.path(dir, paste(prefix, Sys.Date(), sep = "_"))
if (!is.null(ext)) {
file <- paste(file, ext, sep = ".")
}
download_inform(url, file)
if (identical(method, "base")) {
base_download_inform(url, file)
} else {
download_inform(url, file, ...)
}
}

# name used: metadata, fdadrugs, rxnorm, athena
Expand All @@ -111,7 +125,8 @@ faers_user_cache_dir <- function(create = TRUE) {
}

# file or path utils function --------------
dir_or_unzip <- function(path, compress_dir, pattern, none_msg, ignore.case = TRUE) {
dir_or_unzip <- function(path, compress_dir, pattern, none_msg,
ignore.case = TRUE) {
if (dir.exists(path)) {
return(path)
} else if (file.exists(path)) {
Expand All @@ -138,14 +153,15 @@ zip2 <- function(zipfile, files, ..., root = getwd()) {
#' Will always add the basename into the compress_dir
#' @noRd
unzip2 <- function(path, compress_dir, ignore.case = TRUE) {
compress_dir <- file.path(dir_create2(compress_dir), str_remove(
basename(path), "\\.zip$",
ignore.case = ignore.case
))
if (is.null(utils::unzip(path, exdir = dir_create2(compress_dir), overwrite = TRUE))) {
compress_dir <- file.path(
dir_create2(compress_dir),
str_remove(basename(path), "\\.zip$", ignore.case = ignore.case)
)
exdir <- dir_create2(compress_dir)
if (is.null(utils::unzip(path, exdir = exdir, overwrite = TRUE))) {
cli::cli_abort("Cannot uncompress {.file {path}}")
}
compress_dir
exdir
}

locate_dir <- function(path, pattern = NULL, ignore.case = TRUE) {
Expand Down
2 changes: 1 addition & 1 deletion man/faers_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions man/fda_drugs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c71ca52

Please sign in to comment.