Skip to content

Commit

Permalink
update get_dataset() etc with explicit use_cache()
Browse files Browse the repository at this point in the history
- additional documentation updates
  • Loading branch information
mtmorgan committed Oct 15, 2024
1 parent af5a490 commit d8ae3ba
Show file tree
Hide file tree
Showing 30 changed files with 135 additions and 127 deletions.
22 changes: 13 additions & 9 deletions R/get_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' @template version
#' @template envvars
#' @template dots
#' @param use_cache one of `"disk"`, `"session"`, or `"none"`, describing how datasets are cached to reduce network traffic. See \code{\link{cache_dataset}} for details.
#' @return A list of class \dQuote{dataverse_dataset} or a list of a form dependent
#' on the specific metadata block retrieved. \code{dataset_files} returns a list of
#' objects of class \dQuote{dataverse_file}.
Expand All @@ -45,15 +46,16 @@ get_dataset <- function(
version = ":latest",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
...,
use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version))
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache)
if (!is.null(version)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version)
} else {
u <- paste0(api_url(server), "datasets/", dataset)
}
r <- api_get(u, ..., key = key)
r <- api_get(u, ..., key = key, use_cache = use_cache)
parse_dataset(r)
}

Expand All @@ -69,16 +71,17 @@ dataset_metadata <- function(
block = "citation",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
...,
use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version))
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache)
if (!is.null(block)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata/", block)
} else {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata")
}

r <- api_get(u, ..., key = key)
r <- api_get(u, ..., key = key, use_cache = use_cache)
jsonlite::fromJSON(r)[["data"]]
}

Expand All @@ -89,11 +92,12 @@ dataset_files <- function(
version = ":latest",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
...,
use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version))
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache)
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/files")
r <- api_get(u, ..., key = key)
r <- api_get(u, ..., key = key, use_cache = use_cache)
out <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data
structure(lapply(out, `class<-`, "dataverse_file"))
}
33 changes: 13 additions & 20 deletions R/onload.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,19 @@
#'
#' @importFrom memoise memoise
.onLoad <- function(libname, pkgname) {
# a <- Sys.getenv("DATAVERSE_SERVER")
# if(a == "") {
# Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
# }
##
## 'memoise' httr::GET calls
##

## implement API disk cache via 'memoise'
cache_directory <- file.path(
tools::R_user_dir(pkgname, "cache"),
"api_cache"
)
get <- api_get_impl
if (!dir.exists(cache_directory)) {
status <- dir.create(cache_directory, recursive = TRUE)
if (!status)
warning("'dataverse' failed to create API cache")
}
if (dir.exists(cache_directory)) {
# disk cache with max age 30 days
cache <- cache_disk(cache_directory, max_age = 60 * 60 * 24 * 30)
get <- memoise(get, cache = cache)
## API session cache
api_get_session_cache <<- memoise(api_get_impl)

## API disk cache
cache_path <- cache_path()
if (dir.exists(cache_path)) {
# disk cache, no age or size limits
cache <- cache_disk(cache_path)
get_disk <- memoise(api_get_impl, cache = cache)
}
api_get_memoized <<- get
api_get_disk_cache <<- get_disk
}
37 changes: 22 additions & 15 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ dataset_id.character <- function(x, key = Sys.getenv("DATAVERSE_KEY"), server =
x <- prepend_doi(x)
u <- paste0(api_url(server), "datasets/:persistentId?persistentId=", x)
r <- tryCatch({
api_get(u, ..., key = key)
api_get(u, ..., key = key)
}, error = function(e) {
stop("Could not retrieve Dataset ID from persistent identifier!")
stop(
"Could not retrieve Dataset ID from persistent identifier! ",
conditionMessage(e)
)
})
jsonlite::fromJSON(r)[["data"]][["id"]]
}
Expand Down Expand Up @@ -206,21 +209,23 @@ api_url <- function(server = Sys.getenv("DATAVERSE_SERVER"), prefix = "api/") {
}

## common httr::GET() uses
#' @importFrom checkmate assert_character assert_logical
api_get <- function(url, ..., key = NULL, as = "text", use_cache = as.logical(Sys.getenv("DATAVERSE_USE_CACHE", TRUE))) {
assert_character(url, any.missing = FALSE, len = 1L, null.ok = TRUE)
assert_character(key, any.missing = FALSE, len = 1L, null.ok = TRUE)
assert_character(as, any.missing = FALSE, len = 1L, null.ok = TRUE)
assert_logical(use_cache, any.missing = FALSE, len = 1L)
if (use_cache) {
get <- api_get_memoized
} else {
get <- api_get_impl
}
#' @importFrom checkmate assert_string
api_get <- function(url, ..., key = NULL, as = "text", use_cache = Sys.getenv("DATAVERSE_USE_CACHE", "session")) {
assert_string(url)
assert_string(key, null.ok = TRUE)
assert_string(as, null.ok = TRUE)
assert_use_cache(use_cache)
get <- switch(
use_cache,
"none" = api_get_impl,
"session" = api_get_session_cache,
"disk" = api_get_disk_cache,
stop("unknown value for 'use_cache'")
)
get(url, ..., key = key, as = as)
}

## cache implemented via memoization; memoized function defined in
## cache implemented via memoization; memoized functions defined in
## .onLoad()
api_get_impl <- function(url, ..., key = NULL, as = "text") {
if (!is.null(key))
Expand All @@ -230,7 +235,9 @@ api_get_impl <- function(url, ..., key = NULL, as = "text") {
httr::content(r, as = as, encoding = "UTF-8")
}

api_get_memoized <- NULL
api_get_session_cache <- NULL # per-session memoisatoin

api_get_disk_cache <- NULL # 'permanent' memoisation

# parse dataset response into list/dataframe
parse_dataset <- function(out) {
Expand Down
7 changes: 3 additions & 4 deletions man-roxygen/dots.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#' @param ... Additional arguments passed to an HTTP request function,
#' such as \code{\link[httr]{GET}}, \code{\link[httr]{POST}}, or
#' \code{\link[httr]{DELETE}}. By default, HTTP requests use
#' values cached from previous identical calls. Use
#' \code{use_cache=FALSE} (or `Sys.setenv(DATAVERSE_USE_CACHE =
#' FALSE)` if cached API calls are not desired.
#' \code{\link[httr]{DELETE}}. See \code{\link{use_cache}} for details
#' on how the *R* dataverse package uses disk and session caches to
#' improve network performance.
6 changes: 3 additions & 3 deletions man/URLs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/add_dataset_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/add_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/create_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/create_dataverse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/dataset_atom.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/dataset_versions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/dataverse_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/delete_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/delete_dataverse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/delete_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/delete_sword_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/files.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 11 additions & 6 deletions man/get_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d8ae3ba

Please sign in to comment.