diff --git a/NAMESPACE b/NAMESPACE index 4532f2e..063312a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(geotargets_option_get) export(geotargets_option_set) export(tar_terra_rast) +export(tar_terra_sprc) export(tar_terra_vect) importFrom(rlang,"%||%") importFrom(rlang,arg_match0) diff --git a/R/tar-terra-sprc.R b/R/tar-terra-sprc.R new file mode 100644 index 0000000..1d840a8 --- /dev/null +++ b/R/tar-terra-sprc.R @@ -0,0 +1,160 @@ +#' Create a terra _SpatRasterCollection_ target +#' +#' Provides a target format for [terra::SpatRasterCollection] objects, +#' which have no restriction in the extent or other geometric parameters. +#' +#' @param filetype character. File format expressed as GDAL driver names passed +#' to [terra::writeRaster()] +#' @param gdal character. GDAL driver specific datasource creation options +#' passed to [terra::writeRaster()] +#' @param ... Additional arguments not yet used +#' +#' @inheritParams targets::tar_target +#' @seealso [targets::tar_target_raw()] +#' @author Andrew Gene Brown +#' @author Nicholas Tierney +#' @export +#' @examples +#' if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { +#' targets::tar_dir({ # tar_dir() runs code from a temporary directory. +#' library(geotargets) +#' targets::tar_script({ +#' elev_scale <- function(z = 1, projection = "EPSG:4326") { +#' terra::project( +#' terra::rast(system.file("ex", "elev.tif", package = "terra")) * z, +#' projection +#' ) +#' } +#' list( +#' tar_terra_sprc( +#' raster_elevs, +#' # two rasters, one unaltered, one scaled by factor of 2 and +#' # reprojected to interrupted good homolosine +#' command = terra::sprc(list( +#' elev_scale(1), +#' elev_scale(2, "+proj=igh") +#' )) +#' ) +#' ) +#' }) +#' targets::tar_make() +#' x <- targets::tar_read(raster_elevs) +#' }) +#' } +tar_terra_sprc <- function(name, + command, + pattern = NULL, + filetype = NULL, + gdal = NULL, + ..., + tidy_eval = targets::tar_option_get("tidy_eval"), + packages = targets::tar_option_get("packages"), + library = targets::tar_option_get("library"), + repository = targets::tar_option_get("repository"), + iteration = targets::tar_option_get("iteration"), + error = targets::tar_option_get("error"), + memory = targets::tar_option_get("memory"), + garbage_collection = targets::tar_option_get("garbage_collection"), + deployment = targets::tar_option_get("deployment"), + priority = targets::tar_option_get("priority"), + resources = targets::tar_option_get("resources"), + storage = targets::tar_option_get("storage"), + retrieval = targets::tar_option_get("retrieval"), + cue = targets::tar_option_get("cue")) { + check_pkg_installed("terra") + + name <- targets::tar_deparse_language(substitute(name)) + + envir <- targets::tar_option_get("envir") + + command <- targets::tar_tidy_eval( + expr = as.expression(substitute(command)), + envir = envir, + tidy_eval = tidy_eval + ) + + pattern <- targets::tar_tidy_eval( + expr = as.expression(substitute(pattern)), + envir = envir, + tidy_eval = tidy_eval + ) + + drv <- get_gdal_available_driver_list("raster") + + # if not specified by user, pull the corresponding geotargets option + filetype <- filetype %||% geotargets_option_get("gdal.raster.driver") + filetype <- rlang::arg_match0(filetype, drv$name) + + gdal <- gdal %||% geotargets_option_get("gdal.raster.creation_options") + + targets::tar_target_raw( + name = name, + command = command, + pattern = pattern, + packages = packages, + library = library, + format = create_format_terra_rasters_sprc(filetype = filetype, + gdal = gdal, + ...), + repository = repository, + iteration = iteration, + error = error, + memory = memory, + garbage_collection = garbage_collection, + deployment = deployment, + priority = priority, + resources = resources, + storage = storage, + retrieval = retrieval, + cue = cue + ) +} + +#' @param filetype File format expressed as GDAL driver names passed to `terra::writeRaster()` +#' @param gdal GDAL driver specific datasource creation options passed to `terra::writeRaster()` +#' @param ... Additional arguments not yet used +#' @noRd +create_format_terra_rasters_sprc <- function(filetype, gdal, ...) { + check_pkg_installed("terra") + + drv <- get_gdal_available_driver_list("raster") + + filetype <- filetype %||% geotargets_option_get("gdal.raster.driver") + filetype <- rlang::arg_match0(filetype, drv$name) + + gdal <- gdal %||% geotargets_option_get("gdal.raster.creation_options") + ## TODO + ## Need to append the "opt" argument for GDAL options that is currently + ## controlled with the if(i > 1) part. + + .write_terra_rasters_sprc <- eval( + substitute( + function(object, path) { + for (i in seq(object)) { + if (i > 1) { + opt <- "APPEND_SUBDATASET=YES" + } else { + opt <- "" + } + terra::writeRaster( + x = object[i], + filename = path, + filetype = filetype, + overwrite = (i == 1), + gdal = opt + ) + } + }, + list(filetype = filetype, gdal = gdal) + ) + ) + + format_sprc_geotiff <- targets::tar_format( + read = function(path) terra::sprc(path), + write = .write_terra_rasters_sprc, + marshal = function(object) terra::wrap(object), + unmarshal = function(object) terra::unwrap(object) + ) + + format_sprc_geotiff +} diff --git a/man/tar_terra_sprc.Rd b/man/tar_terra_sprc.Rd new file mode 100644 index 0000000..72114ae --- /dev/null +++ b/man/tar_terra_sprc.Rd @@ -0,0 +1,259 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar-terra-sprc.R +\name{tar_terra_sprc} +\alias{tar_terra_sprc} +\title{Create a terra \emph{SpatRasterCollection} target} +\usage{ +tar_terra_sprc( + name, + command, + pattern = NULL, + filetype = NULL, + gdal = NULL, + ..., + tidy_eval = targets::tar_option_get("tidy_eval"), + packages = targets::tar_option_get("packages"), + library = targets::tar_option_get("library"), + repository = targets::tar_option_get("repository"), + iteration = targets::tar_option_get("iteration"), + error = targets::tar_option_get("error"), + memory = targets::tar_option_get("memory"), + garbage_collection = targets::tar_option_get("garbage_collection"), + deployment = targets::tar_option_get("deployment"), + priority = targets::tar_option_get("priority"), + resources = targets::tar_option_get("resources"), + storage = targets::tar_option_get("storage"), + retrieval = targets::tar_option_get("retrieval"), + cue = targets::tar_option_get("cue") +) +} +\arguments{ +\item{name}{Symbol, name of the target. A target +name must be a valid name for a symbol in R, and it +must not start with a dot. Subsequent targets +can refer to this name symbolically to induce a dependency relationship: +e.g. \code{tar_target(downstream_target, f(upstream_target))} is a +target named \code{downstream_target} which depends on a target +\code{upstream_target} and a function \code{f()}. In addition, a target's +name determines its random number generator seed. In this way, +each target runs with a reproducible seed so someone else +running the same pipeline should get the same results, +and no two targets in the same pipeline share the same seed. +(Even dynamic branches have different names and thus different seeds.) +You can recover the seed of a completed target +with \code{tar_meta(your_target, seed)} and run \code{\link[targets:tar_seed_set]{tar_seed_set()}} +on the result to locally recreate the target's initial RNG state.} + +\item{command}{R code to run the target.} + +\item{pattern}{Language to define branching for a target. +For example, in a pipeline with numeric vector targets \code{x} and \code{y}, +\code{tar_target(z, x + y, pattern = map(x, y))} implicitly defines +branches of \code{z} that each compute \code{x[1] + y[1]}, \code{x[2] + y[2]}, +and so on. See the user manual for details.} + +\item{filetype}{character. File format expressed as GDAL driver names passed +to \code{\link[terra:writeRaster]{terra::writeRaster()}}} + +\item{gdal}{character. GDAL driver specific datasource creation options +passed to \code{\link[terra:writeRaster]{terra::writeRaster()}}} + +\item{...}{Additional arguments not yet used} + +\item{tidy_eval}{Logical, whether to enable tidy evaluation +when interpreting \code{command} and \code{pattern}. If \code{TRUE}, you can use the +"bang-bang" operator \verb{!!} to programmatically insert +the values of global objects.} + +\item{packages}{Character vector of packages to load right before +the target runs or the output data is reloaded for +downstream targets. Use \code{tar_option_set()} to set packages +globally for all subsequent targets you define.} + +\item{library}{Character vector of library paths to try +when loading \code{packages}.} + +\item{repository}{Character of length 1, remote repository for target +storage. Choices: +\itemize{ +\item \code{"local"}: file system of the local machine. +\item \code{"aws"}: Amazon Web Services (AWS) S3 bucket. Can be configured +with a non-AWS S3 bucket using the \code{endpoint} argument of +\code{\link[targets:tar_resources_aws]{tar_resources_aws()}}, but versioning capabilities may be lost +in doing so. +See the cloud storage section of +\url{https://books.ropensci.org/targets/data.html} +for details for instructions. +\item \code{"gcp"}: Google Cloud Platform storage bucket. +See the cloud storage section of +\url{https://books.ropensci.org/targets/data.html} +for details for instructions. +} + +Note: if \code{repository} is not \code{"local"} and \code{format} is \code{"file"} +then the target should create a single output file. +That output file is uploaded to the cloud and tracked for changes +where it exists in the cloud. The local file is deleted after +the target runs.} + +\item{iteration}{Character of length 1, name of the iteration mode +of the target. Choices: +\itemize{ +\item \code{"vector"}: branching happens with \code{vctrs::vec_slice()} and +aggregation happens with \code{vctrs::vec_c()}. +\item \code{"list"}, branching happens with \verb{[[]]} and aggregation happens with +\code{list()}. +\item \code{"group"}: \code{dplyr::group_by()}-like functionality to branch over +subsets of a non-dynamic data frame. +For \code{iteration = "group"}, the target must not by dynamic +(the \code{pattern} argument of \code{\link[targets:tar_target]{tar_target()}} must be left \code{NULL}). +The target's return value must be a data +frame with a special \code{tar_group} column of consecutive integers +from 1 through the number of groups. Each integer designates a group, +and a branch is created for each collection of rows in a group. +See the \code{\link[targets:tar_group]{tar_group()}} function to see how you can +create the special \code{tar_group} column with \code{dplyr::group_by()}. +}} + +\item{error}{Character of length 1, what to do if the target +stops and throws an error. Options: +\itemize{ +\item \code{"stop"}: the whole pipeline stops and throws an error. +\item \code{"continue"}: the whole pipeline keeps going. +\item \code{"abridge"}: any currently running targets keep running, +but no new targets launch after that. +(Visit \url{https://books.ropensci.org/targets/debugging.html} +to learn how to debug targets using saved workspaces.) +\item \code{"null"}: The errored target continues and returns \code{NULL}. +The data hash is deliberately wrong so the target is not +up to date for the next run of the pipeline. +}} + +\item{memory}{Character of length 1, memory strategy. +If \code{"persistent"}, the target stays in memory +until the end of the pipeline (unless \code{storage} is \code{"worker"}, +in which case \code{targets} unloads the value from memory +right after storing it in order to avoid sending +copious data over a network). +If \code{"transient"}, the target gets unloaded +after every new target completes. +Either way, the target gets automatically loaded into memory +whenever another target needs the value. +For cloud-based dynamic files +(e.g. \code{format = "file"} with \code{repository = "aws"}), +this memory strategy applies to the +temporary local copy of the file: +\code{"persistent"} means it remains until the end of the pipeline +and is then deleted, +and \code{"transient"} means it gets deleted as soon as possible. +The former conserves bandwidth, +and the latter conserves local storage.} + +\item{garbage_collection}{Logical, whether to run \code{base::gc()} +just before the target runs.} + +\item{deployment}{Character of length 1. If \code{deployment} is +\code{"main"}, then the target will run on the central controlling R process. +Otherwise, if \code{deployment} is \code{"worker"} and you set up the pipeline +with distributed/parallel computing, then +the target runs on a parallel worker. For more on distributed/parallel +computing in \code{targets}, please visit +\url{https://books.ropensci.org/targets/crew.html}.} + +\item{priority}{Numeric of length 1 between 0 and 1. Controls which +targets get deployed first when multiple competing targets are ready +simultaneously. Targets with priorities closer to 1 get dispatched earlier +(and polled earlier in \code{\link[targets:tar_make_future]{tar_make_future()}}).} + +\item{resources}{Object returned by \code{tar_resources()} +with optional settings for high-performance computing +functionality, alternative data storage formats, +and other optional capabilities of \code{targets}. +See \code{tar_resources()} for details.} + +\item{storage}{Character of length 1, only relevant to +\code{\link[targets:tar_make_clustermq]{tar_make_clustermq()}} and \code{\link[targets:tar_make_future]{tar_make_future()}}. +Must be one of the following values: +\itemize{ +\item \code{"main"}: the target's return value is sent back to the +host machine and saved/uploaded locally. +\item \code{"worker"}: the worker saves/uploads the value. +\item \code{"none"}: almost never recommended. It is only for +niche situations, e.g. the data needs to be loaded +explicitly from another language. If you do use it, +then the return value of the target is totally ignored +when the target ends, but +each downstream target still attempts to load the data file +(except when \code{retrieval = "none"}). + +If you select \code{storage = "none"}, then +the return value of the target's command is ignored, +and the data is not saved automatically. +As with dynamic files (\code{format = "file"}) it is the +responsibility of the user to write to +the data store from inside the target. + +The distinguishing feature of \code{storage = "none"} +(as opposed to \code{format = "file"}) +is that in the general case, +downstream targets will automatically try to load the data +from the data store as a dependency. As a corollary, \code{storage = "none"} +is completely unnecessary if \code{format} is \code{"file"}. +}} + +\item{retrieval}{Character of length 1, only relevant to +\code{\link[targets:tar_make_clustermq]{tar_make_clustermq()}} and \code{\link[targets:tar_make_future]{tar_make_future()}}. +Must be one of the following values: +\itemize{ +\item \code{"main"}: the target's dependencies are loaded on the host machine +and sent to the worker before the target runs. +\item \code{"worker"}: the worker loads the targets dependencies. +\item \code{"none"}: the dependencies are not loaded at all. +This choice is almost never recommended. It is only for +niche situations, e.g. the data needs to be loaded +explicitly from another language. +}} + +\item{cue}{An optional object from \code{tar_cue()} to customize the +rules that decide whether the target is up to date.} +} +\description{ +Provides a target format for \link[terra:SpatRaster-class]{terra::SpatRasterCollection} objects, +which have no restriction in the extent or other geometric parameters. +} +\examples{ +if (Sys.getenv("TAR_LONG_EXAMPLES") == "true") { + targets::tar_dir({ # tar_dir() runs code from a temporary directory. + library(geotargets) + targets::tar_script({ + elev_scale <- function(z = 1, projection = "EPSG:4326") { + terra::project( + terra::rast(system.file("ex", "elev.tif", package = "terra")) * z, + projection + ) + } + list( + tar_terra_sprc( + raster_elevs, + # two rasters, one unaltered, one scaled by factor of 2 and + # reprojected to interrupted good homolosine + command = terra::sprc(list( + elev_scale(1), + elev_scale(2, "+proj=igh") + )) + ) + ) + }) + targets::tar_make() + x <- targets::tar_read(raster_elevs) + }) +} +} +\seealso{ +\code{\link[targets:tar_target_raw]{targets::tar_target_raw()}} +} +\author{ +Andrew Gene Brown + +Nicholas Tierney +} diff --git a/tests/testthat/test-tar-terra-rasts.R b/tests/testthat/test-tar-terra-rasts.R new file mode 100644 index 0000000..6c9aa58 --- /dev/null +++ b/tests/testthat/test-tar-terra-rasts.R @@ -0,0 +1,37 @@ +targets::tar_test("tar_terra_sprc() works", { + geotargets::geotargets_option_set( + "raster_gdal_creation_options", + c("COMPRESS=DEFLATE", "TFW=YES") + ) + targets::tar_script({ + elev_scale <- function(z = 1, projection = "EPSG:4326") { + terra::project( + terra::rast( + system.file( + "ex", + "elev.tif", + package = "terra" + ) + ) * z, + projection + ) + } + list( + geotargets::tar_terra_sprc( + raster_elevs, + # two rasters, one unaltered, one scaled by factor of 2 and + # reprojected to interrupted good homolosine + command = terra::sprc(list( + elev_scale(1), + elev_scale(2, "+proj=igh") + )) + ) + ) + }) + targets::tar_make() + x <- targets::tar_read(raster_elevs) + expect_s4_class(x, "SpatRasterCollection") + expect_snapshot( + x + ) +})