From 338febbbc627b2163e671655d71856fa92b62f92 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Sat, 2 Mar 2024 09:48:47 -0800 Subject: [PATCH] Implement GDAL driver detection via {vapour} (#16) * Implement GDAL driver detection via {vapour} for #15 * Update docs --- DESCRIPTION | 5 +++-- R/gpkg-io.R | 55 +++++++++++++++++++++++++++++++++++++++++------ R/gpkg-table.R | 8 ++++--- man/gpkg_read.Rd | 5 ++++- man/gpkg_table.Rd | 10 ++++++--- man/gpkg_write.Rd | 2 ++ 6 files changed, 69 insertions(+), 16 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 27d1d6e..333db0f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: gpkg Type: Package Title: Utilities for the Open Geospatial Consortium 'GeoPackage' Format -Version: 0.0.8 +Version: 0.0.9 Authors@R: person(given="Andrew", family="Brown", email="brown.andrewg@gmail.com", role = c("aut", "cre")) Maintainer: Andrew Brown Description: Build Open Geospatial Consortium 'GeoPackage' files (). 'GDAL' utilities for reading and writing spatial data are provided by the 'terra' package. Additional 'GeoPackage' and 'SQLite' features for attributes and tabular data are implemented with the 'RSQLite' package. @@ -14,6 +14,7 @@ Imports: Suggests: RSQLite, terra (>= 1.6), + vapour, tinytest, dplyr, dbplyr, @@ -21,7 +22,7 @@ Suggests: rmarkdown License: CC0 Depends: R (>= 3.1.0) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Roxygen: list(markdown = TRUE) Encoding: UTF-8 LazyData: true diff --git a/R/gpkg-io.R b/R/gpkg-io.R index fe905dd..e3e2ef9 100644 --- a/R/gpkg-io.R +++ b/R/gpkg-io.R @@ -1,12 +1,13 @@ #' Read data from a GeoPackage #' -#' Experimental: This function is being evaluated for its scope compared to other more general functions that perform similar operations (i.e. `gpkg_tables()`). +#' This function creates a _geopackage_ object with references to all tables from the GeoPackage source specified in `x`. For a simple list of tables see `gpkg_tables()`. #' #' @param x Path to GeoPackage #' @param connect Connect to database and store connection in result? Default: `FALSE` #' @param quiet Hide printing of gdalinfo description to stdout. Default: `TRUE` #' @return A _geopackage_ object (list containing tables, grids and vector data) #' @export +#' @seealso [gpkg_tables()] #' @keywords io gpkg_read <- function(x, connect = FALSE, quiet = TRUE) { if (inherits(x, 'geopackage')) { @@ -72,6 +73,9 @@ gpkg_read <- function(x, connect = FALSE, quiet = TRUE) { #' @param gdal_options Additional `gdal_options`, passed to `terra::writeRaster()` #' @param ... Additional arguments are passed as GeoPackage "creation options." See Details. #' @details Additional, non-default GeoPackage creation options can be specified as arguments to this function. The full list of creation options can be viewed [here](https://gdal.org/drivers/raster/gpkg.html#creation-options) or in the `gpkg_creation_options` dataset. The name of the argument is `creation_option` and the value is selected from one of the elements of `values` for that option. +#' +#' If `x` contains source file paths, any comma-separated value (CSV) files are treated as attribute data--even if they contain a geometry column. GeoPackage source file paths are always treated as vector data sources, and only one layer will be read from the source and written to the target. If you need to read raster data from a GeoPackage first create a `SpatRaster` from the layer of interest (see `gpkg_rast()`) before passing to `gpkg_write()`. If you need to read multiple layers from any multi-layer source read them individually into suitable objects. For a source GeoPackage containing multiple layers you can use `gpkg_read()` (returns a _geopackage_ object) or `gpkg_tables()` (returns a _list_ object). +#' #' @return Logical. `TRUE` on successful write of at least one grid. #' @seealso [gpkg_creation_options] #' @export @@ -102,20 +106,57 @@ gpkg_write <- function(x, } .gpkg_process_sources <- function(x, ...) { + if (!is.list(x) || is.data.frame(x)) { x <- list(x) } - # TODO: extend this; only intended for prototyping before general sln - - # objects with a file-based + # objects with a file source src_raster <- vapply(x, inherits, logical(1), c('SpatRaster', 'SpatRasterCollection')) src_vector <- vapply(x, inherits, logical(1), 'SpatVectorProxy') obj_vector <- vapply(x, inherits, logical(1), c('sf', 'SpatVector')) obj_attrib <- vapply(x, inherits, logical(1), 'data.frame') - pth_raster <- vapply(x, .is.file, logical(1), "tif+|vrt|grd|png") - pth_vector <- vapply(x, .is.file, logical(1), "shp|gpkg") - pth_attrib <- vapply(x, .is.file, logical(1), "csv") + + # pth_raster <- vapply(x, .is.file, logical(1), "tif+|vrt|grd|png") + # pth_vector <- vapply(x, .is.file, logical(1), "shp|gpkg") + # pth_attrib <- vapply(x, .is.file, logical(1), "csv") + pth_file <- vapply(x, .is.file, logical(1), ".*") + + # TODO: gdal is not used to read attributes, + # provide support for some other tabular data formats? + # arrow? openxlsx? + pth_attrib <- pth_file & vapply(x, .is.file, logical(1), "csv") + pth_raster <- rep(FALSE, length(x)) + pth_vector <- rep(FALSE, length(x)) + + if (any(pth_file)) { + if (!requireNamespace("vapour")) { + stop("package 'vapour' is required to auto-detect GDAL drivers needed to read from arbitrary file paths", call. = FALSE) + } + + gdal_drv <- vapply(x, function(y) { + if (!is.character(y)) { + "" + } else + vapour::vapour_driver(y) + }, character(1)) + + drv <- vapour::vapour_all_drivers() + drm <- match(gdal_drv, drv$driver) + + + pth_raster <- pth_file & drv$raster[drm] + + # TODO: how to handle GPKG as a raster and vector source? + pth_raster[gdal_drv == "GPKG"] <- FALSE + + pth_vector <- pth_file & drv$vector[drm] + + # TODO: handling of CSV files as attributes/without GDAL + # filter vapour drivers to subset that terra can readwrite + pth_vector[gdal_drv == "CSV"] <- FALSE + } + # classify list of object input grid, features, attributes # - each processing function handles local objects and/or file paths diff --git a/R/gpkg-table.R b/R/gpkg-table.R index 0bb3c43..acdf6e2 100644 --- a/R/gpkg-table.R +++ b/R/gpkg-table.R @@ -60,7 +60,7 @@ gpkg_table_pragma.geopackage <- function(x, table_name = NULL, ...) { #' @export #' @rdname gpkg_table #' @examplesIf !inherits(try(requireNamespace("RSQLite", quietly = TRUE)), 'try-error') &&!inherits(try(requireNamespace("dbplyr", quietly = TRUE)), 'try-error') && !inherits(try(requireNamespace("terra", quietly = TRUE)), 'try-error') -#' @description `gpkg_table()`: access a specific table (by name) and get a "lazy" `tibble` object referencing that table +#' @description `gpkg_table()`: Access a specific table (by name) and get a "lazy" {dbplyr} _tbl_SQLiteConnection_ object referencing that table #' @return `gpkg_table()`: A 'dbplyr' object of class _tbl_SQLiteConnection_ #' @examples #' @@ -135,7 +135,7 @@ gpkg_table.default <- function(x, dplyr::tbl(con, table_name, ...) } -#' @description `gpkg_collect()`: alias for `gpkg_table(..., collect=TRUE)` +#' @description `gpkg_collect()`: Alias for `gpkg_table(..., collect=TRUE)` #' @return `gpkg_collect()`: An object of class _data.frame_ #' @rdname gpkg_table #' @export @@ -143,7 +143,7 @@ gpkg_collect <- function(x, table_name, query_string = FALSE, ...) { gpkg_table(x, table_name, ..., query_string = query_string, collect = TRUE) } -#' @description `gpkg_tbl()`: shorthand for `gpkg_table(..., collect=FALSE)`(default) that always returns a 'dplyr' object. +#' @description `gpkg_tbl()`: Alias for `gpkg_table(..., collect=FALSE)`(default) that _always_ returns a _tbl_SQLiteConnection_ object. #' @return `gpkg_tbl()`: An object of class _tbl_SQLiteConnection_ #' @rdname gpkg_table #' @export @@ -151,6 +151,7 @@ gpkg_tbl <- function(x, table_name, ...) { gpkg_table(x, table_name, ..., collect = FALSE) } +#' @description `gpkg_rast()`: Get a _SpatRaster_ object corresponding to the specified `table_name` #' @return `gpkg_rast()`: A 'terra' object of class _SpatRaster_ #' @export #' @rdname gpkg_table @@ -167,6 +168,7 @@ gpkg_rast <- function(x, table_name = NULL, ...) { } +#' @description `gpkg_rast()`: Get a _SpatVector_ object corresponding to the specified `table_name` #' @return `gpkg_vect()`: A 'terra' object of class _SpatVector_ (may not contain geometry columns) #' @export #' @rdname gpkg_table diff --git a/man/gpkg_read.Rd b/man/gpkg_read.Rd index 782c3cc..ea5cb74 100644 --- a/man/gpkg_read.Rd +++ b/man/gpkg_read.Rd @@ -17,6 +17,9 @@ gpkg_read(x, connect = FALSE, quiet = TRUE) A \emph{geopackage} object (list containing tables, grids and vector data) } \description{ -Experimental: This function is being evaluated for its scope compared to other more general functions that perform similar operations (i.e. \code{gpkg_tables()}). +This function creates a \emph{geopackage} object with references to all tables from the GeoPackage source specified in \code{x}. For a simple list of tables see \code{gpkg_tables()}. +} +\seealso{ +\code{\link[=gpkg_tables]{gpkg_tables()}} } \keyword{io} diff --git a/man/gpkg_table.Rd b/man/gpkg_table.Rd index 3ba612d..ccefac8 100644 --- a/man/gpkg_table.Rd +++ b/man/gpkg_table.Rd @@ -58,11 +58,15 @@ gpkg_vect(x, table_name, ...) \description{ \code{gpkg_table_pragma()}: Get information on a table in a GeoPackage (without returning the whole table). -\code{gpkg_table()}: access a specific table (by name) and get a "lazy" \code{tibble} object referencing that table +\code{gpkg_table()}: Access a specific table (by name) and get a "lazy" {dbplyr} \emph{tbl_SQLiteConnection} object referencing that table -\code{gpkg_collect()}: alias for \code{gpkg_table(..., collect=TRUE)} +\code{gpkg_collect()}: Alias for \code{gpkg_table(..., collect=TRUE)} -\code{gpkg_tbl()}: shorthand for \code{gpkg_table(..., collect=FALSE)}(default) that always returns a 'dplyr' object. +\code{gpkg_tbl()}: Alias for \code{gpkg_table(..., collect=FALSE)}(default) that \emph{always} returns a \emph{tbl_SQLiteConnection} object. + +\code{gpkg_rast()}: Get a \emph{SpatRaster} object corresponding to the specified \code{table_name} + +\code{gpkg_rast()}: Get a \emph{SpatVector} object corresponding to the specified \code{table_name} } \examples{ \dontshow{if (!inherits(try(requireNamespace("RSQLite", quietly = TRUE)), 'try-error') &&!inherits(try(requireNamespace("dbplyr", quietly = TRUE)), 'try-error') && !inherits(try(requireNamespace("terra", quietly = TRUE)), 'try-error')) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/man/gpkg_write.Rd b/man/gpkg_write.Rd index 727ca6a..4b5923b 100644 --- a/man/gpkg_write.Rd +++ b/man/gpkg_write.Rd @@ -43,6 +43,8 @@ Write data to a GeoPackage } \details{ Additional, non-default GeoPackage creation options can be specified as arguments to this function. The full list of creation options can be viewed \href{https://gdal.org/drivers/raster/gpkg.html#creation-options}{here} or in the \code{gpkg_creation_options} dataset. The name of the argument is \code{creation_option} and the value is selected from one of the elements of \code{values} for that option. + +If \code{x} contains source file paths, any comma-separated value (CSV) files are treated as attribute data--even if they contain a geometry column. GeoPackage source file paths are always treated as vector data sources, and only one layer will be read from the source and written to the target. If you need to read raster data from a GeoPackage first create a \code{SpatRaster} from the layer of interest (see \code{gpkg_rast()}) before passing to \code{gpkg_write()}. If you need to read multiple layers from any multi-layer source read them individually into suitable objects. For a source GeoPackage containing multiple layers you can use \code{gpkg_read()} (returns a \emph{geopackage} object) or \code{gpkg_tables()} (returns a \emph{list} object). } \seealso{ \link{gpkg_creation_options}