Skip to content

Commit

Permalink
Merge pull request #137 from mtmorgan/issue-112-memoise-api-GET
Browse files Browse the repository at this point in the history
Issue 112/135 caching memoise api get
  • Loading branch information
kuriwaki authored Oct 16, 2024
2 parents 9af128a + 9a8ecc0 commit ab362f7
Show file tree
Hide file tree
Showing 54 changed files with 514 additions and 179 deletions.
12 changes: 10 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: dataverse
Version: 0.3.14
Version: 0.3.15
Title: Client for Dataverse 4+ Repositories
Authors@R:
c(person(given = "Shiro",
Expand Down Expand Up @@ -37,14 +37,22 @@ Authors@R:
family = "Gruber",
role = c("ctb"),
email = "JohannesB.Gruber@gmail.com",
comment = c(ORCID = "0000-0001-9177-1772")))
comment = c(ORCID = "0000-0001-9177-1772")),
person(given = "Martin",
family = "Morgan",
role = "ctb",
email = "mtmorgan.xyz@gmail.com",
comment = c(ORCID = "0000-0002-5874-8148")))
Imports:
checkmate,
httr,
memoise,
cachem,
jsonlite,
readr,
stats,
utils,
tools,
xml2
Suggests:
covr,
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ S3method(print,get_file)
S3method(print,sword_service_document)
export(add_dataset_file)
export(add_file)
export(cache_dataset)
export(cache_info)
export(cache_path)
export(cache_reset)
export(create_dataset)
export(create_dataverse)
export(dataset_atom)
Expand Down Expand Up @@ -63,3 +67,8 @@ export(service_document)
export(set_dataverse_metadata)
export(update_dataset)
export(update_dataset_file)
importFrom(cachem,cache_disk)
importFrom(checkmate,assert_string)
importFrom(checkmate,test_string)
importFrom(memoise,memoise)
importFrom(tools,R_user_dir)
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# dataverse

# CHANGES in dataverse 0.3.15

* Implement a cache for API calls (including file download) when dataset version is specified. The functions will reload from the cache automatically the second time. (#112, #135, by @mtmorgan)

# CHANGES in dataverse 0.3.14

* Improve recommendation for rdata loading (#107, #127)
Expand Down
10 changes: 4 additions & 6 deletions R/SWORD.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
#' @export
service_document <- function(key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
u <- paste0(api_url(server, prefix = "dvn/api/"), "data-deposit/v1.1/swordv2/service-document")
r <- httr::GET(u, httr::authenticate(key, ""), ...)
httr::stop_for_status(r)
x <- xml2::as_list(xml2::read_xml(httr::content(r, "text")))
r <- api_get(u, httr::authenticate(key, ""), ...)
x <- xml2::as_list(xml2::read_xml(r))
w <- x$workspace
out <- list()
if ("title" %in% names(w)) {
Expand Down Expand Up @@ -74,11 +73,10 @@ list_datasets <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server =
dataverse <- get_dataverse(dataverse, key = key, server = server, ...)$alias
}
u <- paste0(api_url(server, prefix = "dvn/api/"), "data-deposit/v1.1/swordv2/collection/dataverse/", dataverse)
r <- httr::GET(u, httr::authenticate(key, ""), ...)
httr::stop_for_status(r)
r <- api_get(u, httr::authenticate(key, ""), ..., as = "raw")

# clean up response structure
x <- xml2::as_list(xml2::read_xml(r$content))
x <- xml2::as_list(xml2::read_xml(r))
feed <- x[["feed"]]
out <- list(title = feed[["title"]][[1L]],
generator = feed[["generator"]],
Expand Down
10 changes: 4 additions & 6 deletions R/SWORD_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,8 @@ dataset_atom <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sy
u <- paste0(api_url(server, prefix="dvn/api/"), "data-deposit/v1.1/swordv2/edit/study/", dataset)
}

r <- httr::GET(u, httr::authenticate(key, ""), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- parse_atom(rawToChar(r$content))
r <- api_get(u, httr::authenticate(key, ""), ..., as = "raw")
out <- parse_atom(rawToChar(r))
out
}

Expand All @@ -229,7 +228,6 @@ dataset_statement <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server
dataset <- prepend_doi(dataset)
u <- paste0(api_url(server, prefix="dvn/api/"), "data-deposit/v1.1/swordv2/statement/study/", dataset)
}
r <- httr::GET(u, httr::authenticate(key, ""), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
parse_dataset_statement(rawToChar(r$content))
r <- api_get(u, httr::authenticate(key, ""), ..., as = "raw")
parse_dataset_statement(rawToChar(r))
}
93 changes: 93 additions & 0 deletions R/cache.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#' @rdname cache
#' @aliases use_cache
#' @title Utilities for cache management
#' @description The dataverse package uses disk and session caches to improve network performance. Use of the cache is described on this page.
#' @details
#' Use of the cache is determined by the value of the `use_cache =` argument to dataset and other API calls, or by the environment variable `DATAVERSE_USE_CACHE`. Possible values are
#'
#' - `"none"`: do not use the cache. This is the default for datasets that are versioned with `":draft"`, `":latest"`, and `":latest-published"`.
#' - `"session"`: cache API requests for the duration of the *R* session. This is the default for API calls that do not involve file or dataset retrieval.
#' - `"disk": use a permanent disk cache. This is the default for files and explicitly versioned datasets.
#'
#' @template version
#' @details
#' `cache_dataset()` determines whether a dataset or file should be cached based on the version specification.
#' @return
#' `cache_dataset()` returns `"disk"` if the dataset version is to be cached to disk, `"none"` otherwise.
#' @importFrom checkmate assert_string
#' @examples
#' cache_dataset(":latest") # "none"
#' cache_dataset("1.2") # "disk"
#' @export
cache_dataset <- function(version) {
assert_string(version)
if (version %in% c(":draft", ":latest", ":latest-published")) {
"none"
} else {
"disk"
}
}

#' @rdname cache
#' @details
#' `cache_path()` finds or creates the location (directory) on the file system containing the cache.
#'
#' @return
#' `cache_path()` returns the file path to the directory containing the cache.
#'
#' @examples
#' cache_path()
#'
#' @importFrom tools R_user_dir
#' @export
cache_path <- function() {
cache_path <- file.path(R_user_dir("dataverse", "cache"), "api_cache")
if (!dir.exists(cache_path)) {
status <- dir.create(cache_path, recursive = TRUE)
if (!status)
warning("'dataverse' failed to create a 'disk' cache")
}

cache_path
}

#' @rdname cache
#' @details
#' `cache_info()` queries the cache for information about the name, size, and other attributes of files in the cache. The file name is a 'hash' of the function used to retrieve the file; it is not useful for identifying specific files.
#' @return
#' `cache_info()` returns a data.frame containing names and sizes of files in the cache.
#' @examples
#' cache_info()
#' @export
cache_info <- function() {
cache_path <- cache_path()
if (dir.exists(cache_path)) {
files <- dir(cache_path(), full.names = TRUE)
info <- file.info(files)
rownames(info) <- basename(files)
info
}
}

#' @rdname cache
#' @details
#' `cache_reset()` clears all downloaded files from the disk cache.
#' @returns
#' `cache_reset()` returns the path to the (now empty) cache, invisibly)
#' @export
cache_reset <- function() {
cache_path <- cache_path()
if (dir.exists(cache_path))
cache_disk(cache_path)$reset()
invisible(cache_path)
}

## utility to check valid values of `use_cache =`
#' @importFrom checkmate test_string
assert_use_cache <- function(use_cache) {
test <-
test_string(use_cache) &&
use_cache %in% c("disk", "session", "none")
if (!test)
stop("argument 'use_cache' is not correct, see ?use_cache")
}
5 changes: 2 additions & 3 deletions R/dataset_versions.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
dataset_versions <- function(dataset, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
u <- paste0(api_url(server), "datasets/", dataset, "/versions")
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- httr::content(r, encoding = "UTF-8")$data
r <- api_get(u, ..., key = key, as = NULL)
out <- r$data
lapply(out, function(x) {
x <- `class<-`(x, "dataverse_dataset_version")
x$files <- lapply(x$files, `class<-`, "dataverse_file")
Expand Down
5 changes: 2 additions & 3 deletions R/dataverse_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
dataverse_metadata <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
dataverse <- dataverse_id(dataverse, key = key, server = server, ...)
u <- paste0(api_url(server), "dataverses/", dataverse, "/metadatablocks")
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data
r <- api_get(u, ..., key = key)
jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data
}

#' @title Set Dataverse metadata
Expand Down
5 changes: 2 additions & 3 deletions R/dataverse_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,8 @@ function(...,
u <- paste0(api_url(server), "search")

# execute request
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))
r <- api_get(u, query = query, key = key)
out <- jsonlite::fromJSON(r)
if (isTRUE(verbose)) {
n_total <- ngettext(out$data$total_count, "result", "results")
message(sprintf(paste0("%s of %s ", n_total, " retrieved"), out$data$count_in_response, out$data$total_count))
Expand Down
32 changes: 16 additions & 16 deletions R/get_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' @template version
#' @template envvars
#' @template dots
#' @param use_cache one of `"disk"`, `"session"`, or `"none"`, describing how datasets are cached to reduce network traffic. See \code{\link{cache_dataset}} for details.
#' @return A list of class \dQuote{dataverse_dataset} or a list of a form dependent
#' on the specific metadata block retrieved. \code{dataset_files} returns a list of
#' objects of class \dQuote{dataverse_file}.
Expand All @@ -45,17 +46,17 @@ get_dataset <- function(
version = ":latest",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
...,
use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version))
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache)
if (!is.null(version)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version)
} else {
u <- paste0(api_url(server), "datasets/", dataset)
}
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
parse_dataset(httr::content(r, as = "text", encoding = "UTF-8"))
r <- api_get(u, ..., key = key, use_cache = use_cache)
parse_dataset(r)
}

#' @rdname get_dataset
Expand All @@ -70,19 +71,18 @@ dataset_metadata <- function(
block = "citation",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
...,
use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version))
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache)
if (!is.null(block)) {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata/", block)
} else {
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/metadata")
}

r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- httr::content(r, as = "text", encoding = "UTF-8")
jsonlite::fromJSON(out)[["data"]]
r <- api_get(u, ..., key = key, use_cache = use_cache)
jsonlite::fromJSON(r)[["data"]]
}

#' @rdname get_dataset
Expand All @@ -92,12 +92,12 @@ dataset_files <- function(
version = ":latest",
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
...,
use_cache = Sys.getenv("DATAVERSE_USE_CACHE", cache_dataset(version))
) {
dataset <- dataset_id(dataset, key = key, server = server, ...)
dataset <- dataset_id(dataset, key = key, server = server, ..., use_cache = use_cache)
u <- paste0(api_url(server), "datasets/", dataset, "/versions/", version, "/files")
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)$data
r <- api_get(u, ..., key = key, use_cache = use_cache)
out <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE)$data
structure(lapply(out, `class<-`, "dataverse_file"))
}
10 changes: 4 additions & 6 deletions R/get_dataverse.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ get_dataverse <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server =
dataverse <- dataverse_id(dataverse, key = key, server = server, ...)
}
u <- paste0(api_url(server), "dataverses/", dataverse)
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))
r <- api_get(u, ..., key = key)
out <- jsonlite::fromJSON(r)
structure(out$data, class = "dataverse")
}

Expand All @@ -42,9 +41,8 @@ get_dataverse <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server =
dataverse_contents <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
dataverse <- dataverse_id(dataverse, key = key, server = server, ...)
u <- paste0(api_url(server), "dataverses/", dataverse, "/contents")
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)
r <- api_get(u, ..., key = key)
out <- jsonlite::fromJSON(r, simplifyDataFrame = FALSE)
structure(lapply(out$data, function(x) {
`class<-`(x, if (x$type == "dataset") "dataverse_dataset" else "dataverse")
}))
Expand Down
5 changes: 2 additions & 3 deletions R/get_facets.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
get_facets <- function(dataverse, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
dataverse <- dataverse_id(dataverse, key = key, server = server, ...)
u <- paste0(api_url(server), "dataverses/", dataverse, "/facets")
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))$data
r <- api_get(u, ..., key = key)
jsonlite::fromJSON(r)$data
}
2 changes: 2 additions & 0 deletions R/get_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#' @template envvars
#' @template dots
#' @template ds
#' @template version
#'
#' @return \code{get_file} returns a raw vector (or list of raw vectors,
#' if \code{length(file) > 1}), which can be saved locally with the `writeBin`
Expand Down Expand Up @@ -89,6 +90,7 @@ get_file <- function(
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
original = TRUE,
version = ":latest",
...
) {

Expand Down
14 changes: 4 additions & 10 deletions R/get_file_by_id.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,10 @@ get_file_by_id <- function(
if (return_url) {
return(httr::modify_url(u, query = query))
}
if (isFALSE(progress))
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, ...)

if (isTRUE(progress))
r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), query = query, httr::progress(type = "down"), ...)



httr::stop_for_status(r, task = httr::content(r)$message)
httr::content(r, as = "raw")
# add a progress bar; 'NULL' if progress is not TRUE. 'NULL' arguments
# are not seen by httr::GET()
progress_bar <- if (isTRUE(progress)) httr::progress(type = "down")
api_get(u, query = query, progress_bar, ..., key = key, as = "raw")
}

#' @rdname files
Expand Down
5 changes: 1 addition & 4 deletions R/get_file_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,5 @@ get_file_metadata <-
u <- paste0(api_url(server), "access/datafile/", file, "/metadata/", format)
}

r <- httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...)
httr::stop_for_status(r, task = httr::content(r)$message)
out <- httr::content(r, as = "text", encoding = "UTF-8")
return(out)
api_get(u, ..., key = key)
}
Loading

0 comments on commit ab362f7

Please sign in to comment.