diff --git a/.travis.yml b/.travis.yml index f3be8bb..a349b99 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,10 +2,11 @@ language: R matrix: include: - r: release - env: - - HUTILS_BENCHMARK=TRUE + env: + - HUTILS_BENCHMARK=TRUE - r: release - - HUTILS_BENCHMARK=FALSE + env: + - HUTILS_BENCHMARK=FALSE - r: devel cache: packages latex: true diff --git a/R/auc.R b/R/auc.R index 17becad..b354b7b 100644 --- a/R/auc.R +++ b/R/auc.R @@ -6,7 +6,7 @@ #' an ordered factor, or the unique values are \code{FALSE} and \code{TRUE} (case-insensitively). #' Anything else is an error. #' @param pred Numeric (double) vector the same length as \code{actual} giving the predicted probability of \code{TRUE}. Must be a numeric vector the same length as \code{actual}. -#' @source Source code based on \code{\link[Metrics]{auc}} from Ben Hamner and Michael Frasco and Erin LeDell from the Metrics package. +#' @source Source code based on \code{Metrics::auc} from Ben Hamner and Michael Frasco and Erin LeDell from the Metrics package. #' @export auc #' @author @@ -30,7 +30,8 @@ auc <- function(actual, pred) { if (!is.double(pred)) { # Switch the values if (is.logical(pred) && is.double(actual)) { - # Immeediate return + # Immediate return + message("`pred` was type logical and `actual` was type double, so interpreting as auc(pred, actual). Set to auc(actual, pred) for standard behaviour.") return(auc(actual = pred, pred = actual)) } diff --git a/inst/doc/hutils.R b/inst/doc/hutils.R index b45bb79..d920417 100644 --- a/inst/doc/hutils.R +++ b/inst/doc/hutils.R @@ -39,9 +39,8 @@ na <- sample(letters, size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE) yes <- sample(letters, size = 1, replace = TRUE) @@ -50,9 +49,8 @@ na <- sample(letters, size = 1, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----compare_coalesce---------------------------------------------------- x <- sample(c(letters, NA), size = 100e3, replace = TRUE) @@ -62,18 +60,16 @@ C <- sample(c(letters, NA), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----compare_coalesce_short_circuit_x------------------------------------ x <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----compare_coalesce_short_circuit_A------------------------------------ x <- sample(c(letters, NA), size = 100e3, replace = TRUE) @@ -81,9 +77,8 @@ A <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----canonical_drop_DT--------------------------------------------------- DT <- data.table(A = 1:5, B = 1:5, C = 1:5) diff --git a/inst/doc/hutils.Rmd b/inst/doc/hutils.Rmd index 31fe5be..9cd722e 100644 --- a/inst/doc/hutils.Rmd +++ b/inst/doc/hutils.Rmd @@ -87,9 +87,8 @@ na <- sample(letters, size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE) yes <- sample(letters, size = 1, replace = TRUE) @@ -98,9 +97,8 @@ na <- sample(letters, size = 1, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` This speed advantage also appears to be true of `coalesce`: @@ -113,9 +111,8 @@ C <- sample(c(letters, NA), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` especially during short-circuits: @@ -125,9 +122,8 @@ x <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` ```{r compare_coalesce_short_circuit_A} @@ -136,9 +132,8 @@ A <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` diff --git a/inst/doc/hutils.html b/inst/doc/hutils.html index c4fbdf4..c4304a5 100644 --- a/inst/doc/hutils.html +++ b/inst/doc/hutils.html @@ -12,7 +12,7 @@ - + Hutils @@ -70,7 +70,7 @@

Hutils

Hugh Parsonage

-

2018-05-04

+

2018-05-12

@@ -96,10 +96,6 @@

hutils package

error = function(e) { opts_chunk$set(eval = FALSE) }) -
## data.table 1.11.0
-
##   The fastest way to learn (by data.table authors): https://www.datacamp.com/courses/data-analysis-the-data-table-way
-
##   Documentation: ?data.table, example(data.table) and browseVignettes("data.table")
-
##   Release notes, videos and slides: http://r-datatable.com

Aliases

These are simple additions to magrittr‘s aliases, including: capitalized forms of and and or that invoke && and || (the ’long-form’ logical operators) and nor / neither functions.

@@ -132,14 +128,12 @@

coalesce and if_else

microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot
+ check = my_check) %>% + print
## Unit: milliseconds
-##    expr      min       lq      mean   median        uq      max neval cld
-##   dplyr 5.741339 6.519730 10.106987 6.809764 11.375758 154.1358   100   b
-##  hutils 1.967590 2.277802  3.541882 2.332767  2.485915  59.7201   100  a
-

+## expr min lq mean median uq max neval cld +## dplyr 5.336251 6.632064 10.313847 7.036243 12.16995 168.55931 100 b +## hutils 1.951324 2.073151 2.702137 2.398271 2.51332 14.38661 100 a
cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE)
 yes <- sample(letters, size = 1, replace = TRUE)
 no <- sample(letters, size = 100e3, replace = TRUE)
@@ -147,14 +141,12 @@ 

coalesce and if_else

microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot
+ check = my_check) %>% + print
## Unit: milliseconds
-##    expr      min       lq     mean   median        uq       max neval cld
-##   dplyr 4.985685 5.363662 7.245224 5.404472 10.556256 15.006147   100   b
-##  hutils 1.172181 1.396708 1.874132 1.419448  1.442187  8.039921   100  a
-

+## expr min lq mean median uq max neval cld +## dplyr 4.187562 4.960684 6.705858 5.545116 5.955771 13.898406 100 b +## hutils 1.167061 1.264641 1.716801 1.455437 1.513413 8.251943 100 a

This speed advantage also appears to be true of coalesce:

x <- sample(c(letters, NA), size = 100e3, replace = TRUE)
 A <- sample(c(letters, NA), size = 100e3, replace = TRUE)
@@ -163,43 +155,40 @@ 

coalesce and if_else

microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot
-
## Unit: milliseconds
-##    expr      min       lq     mean   median       uq       max neval cld
-##   dplyr 2.343760 2.695083 5.814904 2.726406 7.818556 148.89049   100   b
-##  hutils 1.000812 1.258770 2.092449 1.284369 1.315390  12.13834   100  a
-

+ check = my_check) %>% + print +
## Unit: microseconds
+##    expr      min       lq     mean   median       uq        max neval cld
+##   dplyr 1885.668 2686.046 4078.867 2770.525 6241.738   9323.229   100   a
+##  hutils  993.582 1236.481 3475.583 1275.182 1333.310 167157.033   100   a

especially during short-circuits:

x <- sample(c(letters), size = 100e3, replace = TRUE)
 
 microbenchmark(dplyr =  dplyr::coalesce(x, A, B, C),
                hutils = hutils::coalesce(x, A, B, C),
-               check = my_check) %T>%
-  print %>%
-  autoplot
+ check = my_check) %>% + print
## Unit: microseconds
-##    expr      min        lq       mean   median        uq        max neval
-##   dplyr 2081.736 2536.0615 5728.88782 2574.914 7626.5565 149937.386   100
-##  hutils   58.730   61.2905   74.99327   67.464   83.1255    160.527   100
+##    expr      min        lq       mean    median       uq       max neval
+##   dplyr 1690.807 2386.0735 4259.60067 2578.6760 2669.481 60602.191   100
+##  hutils   58.429   60.9885   71.95122   65.8065   79.511   127.699   100
 ##  cld
 ##    b
 ##   a
-

x <- sample(c(letters, NA), size = 100e3, replace = TRUE)
 A <- sample(c(letters), size = 100e3, replace = TRUE)
 
 microbenchmark(dplyr =  dplyr::coalesce(x, A, B, C),
                hutils = hutils::coalesce(x, A, B, C),
-               check = my_check) %T>%
-  print %>%
-  autoplot
+ check = my_check) %>% + print
## Unit: microseconds
-##    expr      min       lq      mean    median       uq      max neval cld
-##   dplyr 2303.402 2681.681 4545.0994 2726.5565 7743.262 9070.248   100   b
-##  hutils  574.043  745.564  978.0727  764.9905  790.289 6912.616   100  a
-

+## expr min lq mean median uq max neval +## dplyr 1813.386 2536.0590 3670.791 2721.5840 2813.8950 8877.487 100 +## hutils 567.718 680.0575 2681.907 764.5375 796.7635 165714.396 100 +## cld +## a +## a

Drop columns

@@ -465,9 +454,9 @@

Haversine distance

## expr ## DT1[, `:=`(distance, haversine_distance(lat_orig, lon_orig, lat_dest, lon_dest))] ## DT2[, `:=`(distance, distHaversine(cbind(lon_orig, lat_orig), cbind(lon_orig, lat_orig)))] -## min lq mean median uq max neval cld -## 20.92458 21.06041 22.23995 21.13359 21.25361 35.10851 100 a -## 37.78748 38.29225 49.01212 46.64103 49.33160 199.04370 100 b +## min lq mean median uq max neval cld +## 21.24983 21.38235 22.75108 21.4730 21.57962 80.80091 100 a +## 37.65071 38.64655 48.38211 47.0524 49.93059 215.68083 100 b
rm(DT1, DT2)
diff --git a/man/auc.Rd b/man/auc.Rd index c398ffe..a6e3c05 100644 --- a/man/auc.Rd +++ b/man/auc.Rd @@ -4,7 +4,7 @@ \alias{auc} \title{AUC} \source{ -Source code based on \code{\link[Metrics]{auc}} from Ben Hamner and Michael Frasco and Erin LeDell from the Metrics package. +Source code based on \code{Metrics::auc} from Ben Hamner and Michael Frasco and Erin LeDell from the Metrics package. } \usage{ auc(actual, pred) diff --git a/tests/testthat/test_benchmarks.R b/tests/testthat/test_benchmarks.R index fbb14da..14f4d4d 100644 --- a/tests/testthat/test_benchmarks.R +++ b/tests/testthat/test_benchmarks.R @@ -5,15 +5,19 @@ test_that("Benchmarks", { skip_if_not(identical(Sys.getenv("HUTILS_BENCHMARK"), "TRUE")) library(microbenchmark) invisible(gc(FALSE, reset = TRUE, full = TRUE)) - z <- as.logical(1:10 %% 3) - len10 <- microbenchmark(if_else(z, 1, 2)) - len10 <- microbenchmark(if_else(z, 1, 2)) - expect_lt(median(len10$time), 4500) z <- as.logical(1:1e5 %% 3) - len10 <- microbenchmark(if_else(z, 1:1e5, 1:1e5)) - expect_lt(median(len10$time), 1e6) + len1e5c <- microbenchmark(if_else(z, "a", rep_len(letters, 1e5))) + len1e5c_ifelse <- microbenchmark(base::ifelse(z, "a", rep_len(letters, 1e5))) + expect_lt(median(len1e5c$time) / median(len1e5c_ifelse$time), 0.1) + + z <- as.logical(1:1e4 %% 9) + z[1:1e4 %% 8 == 3] <- NA + len1e4c <- microbenchmark(if_else(z, "a", rep_len(letters, 1e4))) + len1e4c_ifelse <- microbenchmark(base::ifelse(z, "a", rep_len(letters, 1e4))) + expect_lt(median(len1e4c$time), 2e6) }) + diff --git a/vignettes/hutils.Rmd b/vignettes/hutils.Rmd index 31fe5be..9cd722e 100644 --- a/vignettes/hutils.Rmd +++ b/vignettes/hutils.Rmd @@ -87,9 +87,8 @@ na <- sample(letters, size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE) yes <- sample(letters, size = 1, replace = TRUE) @@ -98,9 +97,8 @@ na <- sample(letters, size = 1, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` This speed advantage also appears to be true of `coalesce`: @@ -113,9 +111,8 @@ C <- sample(c(letters, NA), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` especially during short-circuits: @@ -125,9 +122,8 @@ x <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` ```{r compare_coalesce_short_circuit_A} @@ -136,9 +132,8 @@ A <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ```