diff --git a/R/overline.R b/R/overline.R
index 8224c4ad..91a8ad8d 100644
--- a/R/overline.R
+++ b/R/overline.R
@@ -166,7 +166,7 @@ overline2 <-
attrib,
ncores = 1,
simplify = TRUE,
- regionalise = 1e9,
+ regionalise = 1e7,
quiet = ifelse(nrow(sl) < 1000, TRUE, FALSE),
fun = sum) {
if(as.character(unique(sf::st_geometry_type(sl))) == "MULTILINESTRING") {
@@ -232,12 +232,8 @@ overline2 <-
sl <- cbind(c3, sl)
rm(c3)
- # browser()
- # if(requireNamespace("data.table", quietly = TRUE)) {
- # sl = data.table::data.table(sl)
- # }
- slg <- dplyr::group_by_at(sl, c("1", "2", "3", "4"))
- sls <- dplyr::ungroup(dplyr::summarise_all(slg, .funs = fun))
+ sls <- dplyr::group_by_at(sl, c("1", "2", "3", "4"))
+ sls <- dplyr::ungroup(dplyr::summarise_all(sls, .funs = fun))
attrib <- names(sls)[5:ncol(sls)]
coords <- as.matrix(sls[, 1:4])
sl <- sls[, -c(1:4)]
@@ -267,6 +263,10 @@ overline2 <-
}
if (nrow(sl) > regionalise) {
message(paste0("large data detected, using regionalisation, nrow = ", nrow(sl)))
+
+ # Fix for https://github.com/ropensci/stplanr/issues/510
+ sl <- sl[st_is_valid(sl),]
+
suppressWarnings(cents <- sf::st_centroid(sl))
# Fix for https://github.com/r-spatial/sf/issues/1777
if(sf::st_is_longlat(cents)){
@@ -291,7 +291,7 @@ overline2 <-
cl <- parallel::makeCluster(ncores)
parallel::clusterExport(
cl = cl,
- varlist = c("attrib"),
+ varlist = c("attrib","ol_grp"),
envir = environment()
)
parallel::clusterEvalQ(cl, {
@@ -300,13 +300,11 @@ overline2 <-
})
overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) {
pbapply::pblapply(sl, function(y) {
- y <- dplyr::group_by_at(y, attrib)
- y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
+ ol_grp(y, attrib)
}, cl = cl)
} else {
lapply(sl, function(y) {
- y <- dplyr::group_by_at(y, attrib)
- y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
+ ol_grp(y, attrib)
})
}
@@ -315,13 +313,11 @@ overline2 <-
} else {
overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) {
pbapply::pblapply(sl, function(y) {
- y <- dplyr::group_by_at(y, attrib)
- y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
+ ol_grp(y, attrib)
})
} else {
lapply(sl, function(y) {
- y <- dplyr::group_by_at(y, attrib)
- y <- dplyr::summarise(y, do_union = FALSE, .groups = "drop")
+ ol_grp(y, attrib)
})
}
}
@@ -333,8 +329,8 @@ overline2 <-
if (!quiet) {
message(paste0(Sys.time(), " aggregating flows"))
}
- overlined_simple <- dplyr::group_by_at(sl, attrib)
- overlined_simple <- dplyr::summarise(overlined_simple, do_union = FALSE, .groups = "drop")
+
+ overlined_simple <- ol_grp(sl, attrib)
rm(sl)
}
@@ -363,6 +359,11 @@ overline2 <-
#' @export
overline.sf <- overline2
+ol_grp <- function(sl, attrib){
+ sl <- data.table::data.table(sl)
+ sl <- sl[, .(geometry = sf::st_combine(geometry)), by = attrib]
+ sf::st_as_sf(sl)
+}
#' Aggregate flows so they become non-directional (by geometry - the slow way)
#'
diff --git a/data-raw/ad-hoc-tests/test-overline-performance.R b/data-raw/ad-hoc-tests/test-overline-performance.R
new file mode 100644
index 00000000..e69de29b
diff --git a/data-raw/ad-hoc-tests/test-overline-performance.Rmd b/data-raw/ad-hoc-tests/test-overline-performance.Rmd
new file mode 100644
index 00000000..2de6ac0e
--- /dev/null
+++ b/data-raw/ad-hoc-tests/test-overline-performance.Rmd
@@ -0,0 +1,101 @@
+---
+output: github_document
+---
+
+```{r}
+devtools::load_all()
+library(tidyverse)
+```
+
+
+```{r}
+if(!file.exists("routes.geojson")) {
+ routes = pct::get_pct_routes_fast("isle-of-wight")
+ routes = routes %>%
+ slice(1:1000)
+ sf::write_sf(routes, "routes.geojson", delete_dsn = TRUE)
+}
+
+routes = geojsonsf::geojson_sf("routes.geojson")
+nrow(routes)
+
+```
+
+```{r}
+res1 = overline_old(routes, attrib = "foot")
+```
+
+```{r}
+res2 = overline3(routes, attrib = "foot")
+```
+
+```{r}
+summary(res2)
+summary(res2)
+```
+
+
+# Small test
+
+```{r}
+res = bench::mark(time_unit = "s", check = FALSE,
+ original = {o1 <<- overline_old(routes, attrib = "foot")},
+ new = {o2 <<- overline3(routes, attrib = "foot")}
+)
+```
+
+
+The results are as follows:
+
+```{r}
+res |>
+ dplyr::select(expression, median, mem_alloc) |>
+ mutate(routes_per_second = nrow(routes) / median) |>
+ knitr::kable()
+```
+
+# Large test
+
+```{r}
+routes = readRDS("/tmp/uptake_commute_fastest.Rds")
+r = routes |>
+ slice(seq(10000))
+names(r)
+system.time({
+rnet = overline_old(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+})
+
+system.time({
+rnet = overline3(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+})
+bench::mark(check = FALSE, iterations = 1,
+ old = {res1 <<- overline_old(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+ },
+new = {res2 <<- overline3(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+}
+ )
+```
+
+```{r}
+summary(res1)
+summary(res2)
+
+```
+
+
diff --git a/data-raw/ad-hoc-tests/test-overline-performance.md b/data-raw/ad-hoc-tests/test-overline-performance.md
new file mode 100644
index 00000000..07ae3d50
--- /dev/null
+++ b/data-raw/ad-hoc-tests/test-overline-performance.md
@@ -0,0 +1,276 @@
+
+``` r
+devtools::load_all()
+```
+
+ ## ℹ Loading stplanr
+
+ ## Warning: Objects listed as exports, but not present in namespace:
+ ## • overline2
+
+``` r
+library(tidyverse)
+```
+
+ ## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
+ ## ✔ dplyr 1.1.2 ✔ readr 2.1.4
+ ## ✔ forcats 1.0.0 ✔ stringr 1.5.0
+ ## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
+ ## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
+ ## ✔ purrr 1.0.2
+ ## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+ ## ✖ readr::edition_get() masks testthat::edition_get()
+ ## ✖ dplyr::filter() masks stats::filter()
+ ## ✖ purrr::is_null() masks testthat::is_null()
+ ## ✖ dplyr::lag() masks stats::lag()
+ ## ✖ readr::local_edition() masks testthat::local_edition()
+ ## ✖ dplyr::matches() masks tidyr::matches(), testthat::matches()
+ ## ✖ readr::parse_date() masks stplanr::parse_date()
+ ## ℹ Use the conflicted package () to force all conflicts to become errors
+
+``` r
+if(!file.exists("routes.geojson")) {
+ routes = pct::get_pct_routes_fast("isle-of-wight")
+ routes = routes %>%
+ slice(1:1000)
+ sf::write_sf(routes, "routes.geojson", delete_dsn = TRUE)
+}
+
+routes = geojsonsf::geojson_sf("routes.geojson")
+nrow(routes)
+```
+
+ ## [1] 1000
+
+``` r
+res1 = overline_old(routes, attrib = "foot")
+```
+
+ ## 2023-08-17 09:40:59.482411 constructing segments
+
+ ## 2023-08-17 09:41:00.722783 building geometry
+
+ ## 2023-08-17 09:41:01.004696 simplifying geometry
+
+ ## 2023-08-17 09:41:01.005065 aggregating flows
+
+ ## 2023-08-17 09:41:01.237217 rejoining segments into linestrings
+
+``` r
+res2 = overline3(routes, attrib = "foot")
+```
+
+ ## 2023-08-17 09:41:01.377522 constructing segments
+
+ ## 2023-08-17 09:41:02.752467 building geometry
+
+ ## 2023-08-17 09:41:03.045405 simplifying geometry
+
+ ## 2023-08-17 09:41:03.045752 aggregating flows
+
+ ## 2023-08-17 09:41:03.28658 rejoining segments into linestrings
+
+# Small test
+
+``` r
+res = bench::mark(time_unit = "s", check = FALSE,
+ original = {overline_old(routes, attrib = "foot")},
+ new = {overline3(routes, attrib = "foot")}
+)
+```
+
+ ## 2023-08-17 09:41:03.547956 constructing segments
+
+ ## 2023-08-17 09:41:04.903392 building geometry
+
+ ## 2023-08-17 09:41:05.311159 simplifying geometry
+
+ ## 2023-08-17 09:41:05.311571 aggregating flows
+
+ ## 2023-08-17 09:41:05.578866 rejoining segments into linestrings
+
+ ## 2023-08-17 09:41:10.37143 constructing segments
+
+ ## 2023-08-17 09:41:11.527714 building geometry
+
+ ## 2023-08-17 09:41:11.844704 simplifying geometry
+
+ ## 2023-08-17 09:41:11.845035 aggregating flows
+
+ ## 2023-08-17 09:41:12.235894 rejoining segments into linestrings
+
+ ## 2023-08-17 09:41:15.413275 constructing segments
+
+ ## 2023-08-17 09:41:16.371987 building geometry
+
+ ## 2023-08-17 09:41:16.663718 simplifying geometry
+
+ ## 2023-08-17 09:41:16.66405 aggregating flows
+
+ ## 2023-08-17 09:41:16.876137 rejoining segments into linestrings
+
+ ## 2023-08-17 09:41:17.162153 constructing segments
+
+ ## 2023-08-17 09:41:18.370308 building geometry
+
+ ## 2023-08-17 09:41:18.723326 simplifying geometry
+
+ ## 2023-08-17 09:41:18.723716 aggregating flows
+
+ ## 2023-08-17 09:41:19.020335 rejoining segments into linestrings
+
+ ## Warning: Some expressions had a GC in every iteration; so filtering is
+ ## disabled.
+
+The results are as follows:
+
+``` r
+res |>
+ dplyr::select(expression, median, mem_alloc) |>
+ mutate(routes_per_second = nrow(routes) / median) |>
+ knitr::kable()
+```
+
+| expression | median | mem_alloc | routes_per_second |
+|:-----------|---------:|----------:|------------------:|
+| original | 1.561284 | 213MB | 640.4983 |
+| new | 2.189923 | 192MB | 456.6370 |
+
+# Large test
+
+``` r
+routes = readRDS("/tmp/uptake_commute_fastest.Rds")
+r = routes |>
+ slice(seq(10000))
+names(r)
+```
+
+ ## [1] "route_number" "name"
+ ## [3] "provisionName" "distances"
+ ## [5] "time" "quietness"
+ ## [7] "gradient_smooth" "geo_code1"
+ ## [9] "geo_code2" "car"
+ ## [11] "taxi" "foot"
+ ## [13] "bicycle" "public_transport"
+ ## [15] "all" "dist_euclidean"
+ ## [17] "dist_euclidean_jittered" "route_id"
+ ## [19] "splittingID" "geometry"
+ ## [21] "route_hilliness" "length_route"
+ ## [23] "pcycle_go_dutch" "pcycle_ebike"
+ ## [25] "bicycle_go_dutch" "bicycle_ebike"
+ ## [27] "mode_ratio_go_dutch" "mode_ratio_ebike"
+ ## [29] "car_go_dutch" "public_transport_go_dutch"
+ ## [31] "foot_go_dutch" "car_ebike"
+ ## [33] "public_transport_ebike" "foot_ebike"
+
+``` r
+system.time({
+rnet = overline_old(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+})
+```
+
+ ## 2023-08-17 09:42:23.777888 constructing segments
+
+ ## 2023-08-17 09:42:31.65472 building geometry
+
+ ## 2023-08-17 09:42:32.809003 simplifying geometry
+
+ ## 2023-08-17 09:42:32.809407 aggregating flows
+
+ ## 2023-08-17 09:42:36.679727 rejoining segments into linestrings
+
+ ## user system elapsed
+ ## 12.193 1.239 13.469
+
+``` r
+system.time({
+rnet = overline3(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+})
+```
+
+ ## 2023-08-17 09:42:39.289945 constructing segments
+
+ ## 2023-08-17 09:42:46.551332 building geometry
+
+ ## 2023-08-17 09:42:47.787907 simplifying geometry
+
+ ## 2023-08-17 09:42:47.788377 aggregating flows
+
+ ## 2023-08-17 09:42:49.824217 rejoining segments into linestrings
+
+ ## user system elapsed
+ ## 11.511 0.044 11.107
+
+``` r
+bench::mark(check = FALSE, iterations = 1,
+ old = {res1 <<- overline_old(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+ },
+new = {res2 <<- overline3(r,
+ attrib = c("bicycle","bicycle_go_dutch","bicycle_ebike","quietness","gradient_smooth"),
+ fun = list(sum = sum, max = first),
+ ncores = 1,
+ regionalise = 1e9)
+}
+ )
+```
+
+ ## 2023-08-17 09:42:50.545904 constructing segments
+
+ ## 2023-08-17 09:42:57.975437 building geometry
+
+ ## 2023-08-17 09:42:58.99299 simplifying geometry
+
+ ## 2023-08-17 09:42:58.993323 aggregating flows
+
+ ## 2023-08-17 09:43:02.448424 rejoining segments into linestrings
+
+ ## 2023-08-17 09:43:29.905679 constructing segments
+
+ ## 2023-08-17 09:43:36.984241 building geometry
+
+ ## 2023-08-17 09:43:38.07569 simplifying geometry
+
+ ## 2023-08-17 09:43:38.076099 aggregating flows
+
+ ## 2023-08-17 09:43:40.249743 rejoining segments into linestrings
+
+ ## 2023-08-17 09:43:49.839485 constructing segments
+
+ ## 2023-08-17 09:43:56.549587 building geometry
+
+ ## 2023-08-17 09:43:57.601956 simplifying geometry
+
+ ## 2023-08-17 09:43:57.602298 aggregating flows
+
+ ## 2023-08-17 09:44:02.195464 rejoining segments into linestrings
+
+ ## 2023-08-17 09:44:02.867219 constructing segments
+
+ ## 2023-08-17 09:44:11.384976 building geometry
+
+ ## 2023-08-17 09:44:12.857531 simplifying geometry
+
+ ## 2023-08-17 09:44:12.85796 aggregating flows
+
+ ## 2023-08-17 09:44:15.095431 rejoining segments into linestrings
+
+ ## Warning: Some expressions had a GC in every iteration; so filtering is
+ ## disabled.
+
+ ## # A tibble: 2 × 6
+ ## expression min median `itr/sec` mem_alloc `gc/sec`
+ ##
+ ## 1 old 13s 13s 0.0770 3.41GB 0.539
+ ## 2 new 12.9s 12.9s 0.0773 223.49MB 0.386
diff --git a/man/overline.Rd b/man/overline.Rd
index 3bd286f1..47605a49 100644
--- a/man/overline.Rd
+++ b/man/overline.Rd
@@ -20,7 +20,7 @@ overline2(
attrib,
ncores = 1,
simplify = TRUE,
- regionalise = 1e+09,
+ regionalise = 1e+07,
quiet = ifelse(nrow(sl) < 1000, TRUE, FALSE),
fun = sum
)