Skip to content

Commit

Permalink
Merge pull request #517 from ropensci/overline-dt
Browse files Browse the repository at this point in the history
Switch parts of overline to data.table
  • Loading branch information
Robinlovelace authored Aug 17, 2023
2 parents 27657b8 + 519891e commit 6583e63
Show file tree
Hide file tree
Showing 5 changed files with 397 additions and 19 deletions.
37 changes: 19 additions & 18 deletions R/overline.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ overline2 <-
attrib,
ncores = 1,
simplify = TRUE,
regionalise = 1e9,
regionalise = 1e7,
quiet = ifelse(nrow(sl) < 1000, TRUE, FALSE),
fun = sum) {
if(as.character(unique(sf::st_geometry_type(sl))) == "MULTILINESTRING") {
Expand Down Expand Up @@ -232,12 +232,8 @@ overline2 <-
sl <- cbind(c3, sl)
rm(c3)

# browser()
# if(requireNamespace("data.table", quietly = TRUE)) {
# sl = data.table::data.table(sl)
# }
slg <- dplyr::group_by_at(sl, c("1", "2", "3", "4"))
sls <- dplyr::ungroup(dplyr::summarise_all(slg, .funs = fun))
sls <- dplyr::group_by_at(sl, c("1", "2", "3", "4"))
sls <- dplyr::ungroup(dplyr::summarise_all(sls, .funs = fun))
attrib <- names(sls)[5:ncol(sls)]
coords <- as.matrix(sls[, 1:4])
sl <- sls[, -c(1:4)]
Expand Down Expand Up @@ -267,6 +263,10 @@ overline2 <-
}
if (nrow(sl) > regionalise) {
message(paste0("large data detected, using regionalisation, nrow = ", nrow(sl)))

# Fix for https://github.com/ropensci/stplanr/issues/510
sl <- sl[st_is_valid(sl),]

suppressWarnings(cents <- sf::st_centroid(sl))
# Fix for https://github.com/r-spatial/sf/issues/1777
if(sf::st_is_longlat(cents)){
Expand All @@ -291,7 +291,7 @@ overline2 <-
cl <- parallel::makeCluster(ncores)
parallel::clusterExport(
cl = cl,
varlist = c("attrib"),
varlist = c("attrib","ol_grp"),
envir = environment()
)
parallel::clusterEvalQ(cl, {
Expand All @@ -300,13 +300,11 @@ overline2 <-
})
overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) {
pbapply::pblapply(sl, function(y) {
y <- dplyr::group_by_at(y, attrib)
y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
ol_grp(y, attrib)
}, cl = cl)
} else {
lapply(sl, function(y) {
y <- dplyr::group_by_at(y, attrib)
y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
ol_grp(y, attrib)
})
}

Expand All @@ -315,13 +313,11 @@ overline2 <-
} else {
overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) {
pbapply::pblapply(sl, function(y) {
y <- dplyr::group_by_at(y, attrib)
y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
ol_grp(y, attrib)
})
} else {
lapply(sl, function(y) {
y <- dplyr::group_by_at(y, attrib)
y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
ol_grp(y, attrib)
})
}
}
Expand All @@ -333,8 +329,8 @@ overline2 <-
if (!quiet) {
message(paste0(Sys.time(), " aggregating flows"))
}
overlined_simple <- dplyr::group_by_at(sl, attrib)
overlined_simple <- dplyr::summarise(overlined_simple, do_union = FALSE, .groups = "drop")

overlined_simple <- ol_grp(sl, attrib)
rm(sl)
}

Expand Down Expand Up @@ -363,6 +359,11 @@ overline2 <-
#' @export
overline.sf <- overline2

ol_grp <- function(sl, attrib){
sl <- data.table::data.table(sl)
sl <- sl[, .(geometry = sf::st_combine(geometry)), by = attrib]
sf::st_as_sf(sl)
}

#' Aggregate flows so they become non-directional (by geometry - the slow way)
#'
Expand Down
Empty file.
101 changes: 101 additions & 0 deletions data-raw/ad-hoc-tests/test-overline-performance.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
---
output: github_document
---

```{r}
devtools::load_all()
library(tidyverse)
```


```{r}
if(!file.exists("routes.geojson")) {
routes = pct::get_pct_routes_fast("isle-of-wight")
routes = routes %>%
slice(1:1000)
sf::write_sf(routes, "routes.geojson", delete_dsn = TRUE)
}
routes = geojsonsf::geojson_sf("routes.geojson")
nrow(routes)
```

```{r}
res1 = overline_old(routes, attrib = "foot")
```

```{r}
res2 = overline3(routes, attrib = "foot")
```

```{r}
summary(res2)
summary(res2)
```


# Small test

```{r}
res = bench::mark(time_unit = "s", check = FALSE,
original = {o1 <<- overline_old(routes, attrib = "foot")},
new = {o2 <<- overline3(routes, attrib = "foot")}
)
```


The results are as follows:

```{r}
res |>
dplyr::select(expression, median, mem_alloc) |>
mutate(routes_per_second = nrow(routes) / median) |>
knitr::kable()
```

# Large test

```{r}
routes = readRDS("/tmp/uptake_commute_fastest.Rds")
r = routes |>
slice(seq(10000))
names(r)
system.time({
rnet = overline_old(r,
attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
fun = list(sum = sum, max = first),
ncores = 1,
regionalise = 1e9)
})
system.time({
rnet = overline3(r,
attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
fun = list(sum = sum, max = first),
ncores = 1,
regionalise = 1e9)
})
bench::mark(check = FALSE, iterations = 1,
old = {res1 <<- overline_old(r,
attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
fun = list(sum = sum, max = first),
ncores = 1,
regionalise = 1e9)
},
new = {res2 <<- overline3(r,
attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
fun = list(sum = sum, max = first),
ncores = 1,
regionalise = 1e9)
}
)
```

```{r}
summary(res1)
summary(res2)
```


Loading

0 comments on commit 6583e63

Please sign in to comment.