Skip to content

Commit

Permalink
Merge pull request #146 from R-ArcGIS/batch_size
Browse files Browse the repository at this point in the history
Add `page_size` argument to `arc_select()`
  • Loading branch information
JosiahParry authored Feb 14, 2024
2 parents d136c13 + 24e4fb7 commit 38111cf
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 38 deletions.
10 changes: 6 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# arcgislayers 0.1.0 (unreleased)

- **Breaking**:
- `token` arguments are required to be a valid `httr2_token` object (strings are not supported).
- all `host` arguments are removed. Instead, the host is fetched from the `token`.
- all `user` arguments are removed. Instead, the username is fetched from the `token`. If it is not found, an error is thrown.
- includes `page_size` argument to `arc_select()` allowing users to return smaller page sizes and avoid timeouts for dense geometries
- Add support for `GroupLayer`s
- Add `arc_read()` with support for `name_repair` argument using `{vctrs}` (#108)
- Add `get_layer_estimates()` to retrieve estimate info such as the number of features and the extent of the layer
Expand All @@ -18,3 +15,8 @@
- adds cli as an explicit import (has been implicitly imported by httr2)
- repository made public
- add lifecycle badges to all exported functions <https://github.com/R-ArcGIS/arcgislayers/pull/101>

- **Breaking**:
- `token` arguments are required to be a valid `httr2_token` object (strings are not supported).
- all `host` arguments are removed. Instead, the host is fetched from the `token`.
- all `user` arguments are removed. Instead, the username is fetched from the `token`. If it is not found, an error is thrown.
115 changes: 81 additions & 34 deletions R/arc-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,27 @@
#' @inheritParams prepare_spatial_filter
#' @param n_max the maximum number of features to return. By default returns
#' every feature available. Unused at the moment.
#' @param page_size the maximum number of features to return per request. See Details.
#' @param ... additional query parameters passed to the API.
#' @inheritParams arc_open
#'
#' @details
#'
#' See [reference documentation](https://developers.arcgis.com/rest/services-reference/enterprise/query-feature-service-layer-.htm#GUID-BC2AD141-3386-49FB-AA09-FF341145F614) for possible arguments.
#'
#' `FeatureLayers` can contain very dense geometries with a lot of coordinates.
#' In those cases, the feature service may time out before all geometries can
#' be returned. To address this issue, we can reduce the number of features
#' returned per each request by reducing the value of the `page_size` parameter.
#'
#' `arc_select()` works by sending a single request that counts the number of
#' features that will be returned by the current query. That number is then used
#' to calculate how many "pages" of responses are needed to fetch all the results.
#' The number of features returned (page size) is set to the `maxRecordCount`
#' property of the layer by default. However, by setting `page_size` to be
#' smaller than the `maxRecordCount` we can return fewer geometries per page and
#' avoid time outs.
#'
#' `r lifecycle::badge("experimental")`
#'
#' @export
Expand Down Expand Up @@ -57,6 +71,7 @@ arc_select <- function(
filter_geom = NULL,
predicate = "intersects",
n_max = Inf,
page_size = NULL,
token = arc_token(),
...
) {
Expand Down Expand Up @@ -122,7 +137,7 @@ arc_select <- function(
x <- update_params(x, !!!query)

# send the request
collect_layer(x, n_max = n_max, token = token, ...)
collect_layer(x, n_max = n_max, token = token, page_size = page_size, ...)
}

#' Query a FeatureLayer or Table object
Expand All @@ -131,11 +146,21 @@ arc_select <- function(
#' queries for FeatureLayer or Table objects.
#'
#' @noRd
collect_layer <- function(x,
n_max = Inf,
token = arc_token(),
...,
error_call = rlang::caller_env()) {
collect_layer <- function(
x,
n_max = Inf,
token = arc_token(),
page_size = NULL,
...,
error_call = rlang::caller_env()
) {

if (length(page_size) > 1) {
cli::cli_abort("{.arg page_size} must be length 1 not {length(page_size)}")
} else if (!is.null(page_size) && page_size < 1) {
cli::cli_abort("{.arg page_size} must be a positive integer.")
}

# 1. Make base request
# 2. Identify necessary query parameters
# 3. Figure out offsets and update query parameters
Expand Down Expand Up @@ -177,51 +202,68 @@ collect_layer <- function(x,
query_params <- validate_params(query)

# Offsets -----------------------------------------------------------------
# TODO make adjustable
feats_per_page <- x[["maxRecordCount"]]

# count the number of features in a query
n_feats <- count_results(req, query)
# get the maximum allowed to be returned
max_records <- x[["maxRecordCount"]]

if (is.null(n_feats)) {
cli::cli_abort(
c("Can't determine the number of features for {.arg x}.",
"*" = "Check to make sure your {.arg where} statement is valid."),
call = error_call
)
# if its null, just use max records (default)
if (is.null(page_size)) {
feats_per_page <- max_records
} else if (page_size > max_records) {
cli::cli_abort("{.arg page_size} ({page_size}) cannot excede layer's {.field maxRecordCount} property ({max_records})")
} else {
# ensure its an integer.
page_size <- as.integer(page_size)
feats_per_page <- page_size
}

# count the number of features in a query
n_feats <- count_results(req, query_params)

# identify the number of pages needed to return all features
# if n_max is provided need to reduce the number of pages
if (n_feats > n_max) {
n_feats <- n_max
# set `resultRecordCount` to `n_max`
query_params[["resultRecordCount"]] <- n_max
}

n_pages <- floor(n_feats / feats_per_page)

# identify the offsets needed to get all pages
# if n_pages is 0 we set offsets to 0 straight away
if (n_pages == 0) {
offsets <- 0
} else {
offsets <- c(0, (feats_per_page * 1:n_pages) + 1)
if (is.null(n_feats)) {
cli::cli_abort(
c("Can't determine the number of features for {.arg x}.",
"*" = "Check to make sure your {.arg where} statement is valid."),
call = error_call
)
}

# create a list of requests
all_requests <- lapply(offsets, add_offset, req, query_params)
# calculate the total number of requests to be made
n_pages <- ceiling(n_feats / feats_per_page)
# these values get passed to `resultOffset`
offsets <- (1:n_pages - 1) * feats_per_page
# create vector of page sizes to be passed to `resultRecordCount`
record_counts <- rep(feats_per_page, n_pages)
# modify the last offset to have `resultRecordCount` of the remainder
# this lets us get an exact value
record_counts[n_pages] <- n_feats - offsets[n_pages]

# create a list of requests from the offset and page sizes
all_requests <- mapply(
add_offset,
.offset = offsets,
.page_size = record_counts,
MoreArgs = list(.req = req, .params = query_params),
SIMPLIFY = FALSE
)

# make all requests and store responses in list
all_resps <- httr2::req_perform_parallel(all_requests, on_error = "continue")

# identify any errors
# TODO: determine how to handle errors
has_error <- vapply(all_resps, function(x) inherits(x, "error"), logical(1))
# has_error <- vapply(all_resps, function(x) inherits(x, "error"), logical(1))

# fetch the results
res <- lapply(
all_resps[!has_error],
all_resps,
# all_resps[!has_error],
function(x) {
parse_esri_json(
httr2::resp_body_string(x)
Expand All @@ -230,6 +272,7 @@ collect_layer <- function(x,
)

# combine
# TODO enhance this with suggested packages similar to arcpbf
res <- do.call(rbind, res)

if (is.null(res)) {
Expand Down Expand Up @@ -340,10 +383,14 @@ update_params <- function(x, ...) {
#'
#' @keywords internal
#' @noRd
add_offset <- function(offset, request, params) {
params[["resultOffset"]] <- offset
req <- httr2::req_url_path_append(request, "query")
httr2::req_body_form(req, !!!params)
add_offset <- function(.req, .offset, .page_size, .params) {
.req <- httr2::req_url_path_append(.req, "query")
httr2::req_body_form(
.req,
!!!.params,
resultOffset = .offset,
resultRecordCount = .page_size
)
}

#' Validate query parameters
Expand Down
16 changes: 16 additions & 0 deletions man/arc_select.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions tests/testthat/test-arc_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,24 @@ test_that("arc_select() works on `ImageServer`s", {
tmp <- arc_select(landsat, n_max = 2, where = "Month = 2")
expect_snapshot(tmp)
})


test_that("arc_select(): respects `n_max`", {
furl <- "https://services3.arcgis.com/ZvidGQkLaDJxRSJ2/arcgis/rest/services/PLACES_LocalData_for_BetterHealth/FeatureServer/0"

flayer <- arc_open(furl)

res <- arc_select(flayer, n_max = 999)

expect_identical(nrow(res), 999L)
})

test_that("arc_select(): respects `n_max` & `page_size`", {
furl <- "https://services3.arcgis.com/ZvidGQkLaDJxRSJ2/arcgis/rest/services/PLACES_LocalData_for_BetterHealth/FeatureServer/0"

flayer <- arc_open(furl)

res <- arc_select(flayer, n_max = 999, page_size = 111)

expect_identical(nrow(res), 999L)
})

0 comments on commit 38111cf

Please sign in to comment.