diff --git a/.travis.yml b/.travis.yml index f3be8bb..a349b99 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,10 +2,11 @@ language: R matrix: include: - r: release - env: - - HUTILS_BENCHMARK=TRUE + env: + - HUTILS_BENCHMARK=TRUE - r: release - - HUTILS_BENCHMARK=FALSE + env: + - HUTILS_BENCHMARK=FALSE - r: devel cache: packages latex: true diff --git a/R/auc.R b/R/auc.R index 17becad..b354b7b 100644 --- a/R/auc.R +++ b/R/auc.R @@ -6,7 +6,7 @@ #' an ordered factor, or the unique values are \code{FALSE} and \code{TRUE} (case-insensitively). #' Anything else is an error. #' @param pred Numeric (double) vector the same length as \code{actual} giving the predicted probability of \code{TRUE}. Must be a numeric vector the same length as \code{actual}. -#' @source Source code based on \code{\link[Metrics]{auc}} from Ben Hamner and Michael Frasco and Erin LeDell from the Metrics package. +#' @source Source code based on \code{Metrics::auc} from Ben Hamner and Michael Frasco and Erin LeDell from the Metrics package. #' @export auc #' @author @@ -30,7 +30,8 @@ auc <- function(actual, pred) { if (!is.double(pred)) { # Switch the values if (is.logical(pred) && is.double(actual)) { - # Immeediate return + # Immediate return + message("`pred` was type logical and `actual` was type double, so interpreting as auc(pred, actual). Set to auc(actual, pred) for standard behaviour.") return(auc(actual = pred, pred = actual)) } diff --git a/inst/doc/hutils.R b/inst/doc/hutils.R index b45bb79..d920417 100644 --- a/inst/doc/hutils.R +++ b/inst/doc/hutils.R @@ -39,9 +39,8 @@ na <- sample(letters, size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE) yes <- sample(letters, size = 1, replace = TRUE) @@ -50,9 +49,8 @@ na <- sample(letters, size = 1, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----compare_coalesce---------------------------------------------------- x <- sample(c(letters, NA), size = 100e3, replace = TRUE) @@ -62,18 +60,16 @@ C <- sample(c(letters, NA), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----compare_coalesce_short_circuit_x------------------------------------ x <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----compare_coalesce_short_circuit_A------------------------------------ x <- sample(c(letters, NA), size = 100e3, replace = TRUE) @@ -81,9 +77,8 @@ A <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ## ----canonical_drop_DT--------------------------------------------------- DT <- data.table(A = 1:5, B = 1:5, C = 1:5) diff --git a/inst/doc/hutils.Rmd b/inst/doc/hutils.Rmd index 31fe5be..9cd722e 100644 --- a/inst/doc/hutils.Rmd +++ b/inst/doc/hutils.Rmd @@ -87,9 +87,8 @@ na <- sample(letters, size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE) yes <- sample(letters, size = 1, replace = TRUE) @@ -98,9 +97,8 @@ na <- sample(letters, size = 1, replace = TRUE) microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na), hutils = hutils::if_else(cnd, yes, no, na), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` This speed advantage also appears to be true of `coalesce`: @@ -113,9 +111,8 @@ C <- sample(c(letters, NA), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` especially during short-circuits: @@ -125,9 +122,8 @@ x <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` ```{r compare_coalesce_short_circuit_A} @@ -136,9 +132,8 @@ A <- sample(c(letters), size = 100e3, replace = TRUE) microbenchmark(dplyr = dplyr::coalesce(x, A, B, C), hutils = hutils::coalesce(x, A, B, C), - check = my_check) %T>% - print %>% - autoplot + check = my_check) %>% + print ``` diff --git a/inst/doc/hutils.html b/inst/doc/hutils.html index c4fbdf4..c4304a5 100644 --- a/inst/doc/hutils.html +++ b/inst/doc/hutils.html @@ -12,7 +12,7 @@ - +
hutils
package## data.table 1.11.0
-## The fastest way to learn (by data.table authors): https://www.datacamp.com/courses/data-analysis-the-data-table-way
-## Documentation: ?data.table, example(data.table) and browseVignettes("data.table")
-## Release notes, videos and slides: http://r-datatable.com
These are simple additions to magrittr
‘s aliases, including: capitalized forms of and
and or
that invoke &&
and ||
(the ’long-form’ logical operators) and nor
/ neither
functions.
coalesce
and if_else
## Unit: milliseconds
-## expr min lq mean median uq max neval cld
-## dplyr 5.741339 6.519730 10.106987 6.809764 11.375758 154.1358 100 b
-## hutils 1.967590 2.277802 3.541882 2.332767 2.485915 59.7201 100 a
-
+## expr min lq mean median uq max neval cld
+## dplyr 5.336251 6.632064 10.313847 7.036243 12.16995 168.55931 100 b
+## hutils 1.951324 2.073151 2.702137 2.398271 2.51332 14.38661 100 a
cnd <- sample(c(TRUE, FALSE, NA), size = 100e3, replace = TRUE)
yes <- sample(letters, size = 1, replace = TRUE)
no <- sample(letters, size = 100e3, replace = TRUE)
@@ -147,14 +141,12 @@ coalesce
and if_else
microbenchmark(dplyr = dplyr::if_else(cnd, yes, no, na),
hutils = hutils::if_else(cnd, yes, no, na),
- check = my_check) %T>%
- print %>%
- autoplot
## Unit: milliseconds
-## expr min lq mean median uq max neval cld
-## dplyr 4.985685 5.363662 7.245224 5.404472 10.556256 15.006147 100 b
-## hutils 1.172181 1.396708 1.874132 1.419448 1.442187 8.039921 100 a
-
+## expr min lq mean median uq max neval cld
+## dplyr 4.187562 4.960684 6.705858 5.545116 5.955771 13.898406 100 b
+## hutils 1.167061 1.264641 1.716801 1.455437 1.513413 8.251943 100 a
This speed advantage also appears to be true of coalesce
:
x <- sample(c(letters, NA), size = 100e3, replace = TRUE)
A <- sample(c(letters, NA), size = 100e3, replace = TRUE)
@@ -163,43 +155,40 @@ coalesce
and if_else
microbenchmark(dplyr = dplyr::coalesce(x, A, B, C),
hutils = hutils::coalesce(x, A, B, C),
- check = my_check) %T>%
- print %>%
- autoplot
## Unit: milliseconds
-## expr min lq mean median uq max neval cld
-## dplyr 2.343760 2.695083 5.814904 2.726406 7.818556 148.89049 100 b
-## hutils 1.000812 1.258770 2.092449 1.284369 1.315390 12.13834 100 a
-
+ check = my_check) %>%
+ print
+## Unit: microseconds
+## expr min lq mean median uq max neval cld
+## dplyr 1885.668 2686.046 4078.867 2770.525 6241.738 9323.229 100 a
+## hutils 993.582 1236.481 3475.583 1275.182 1333.310 167157.033 100 a
especially during short-circuits:
x <- sample(c(letters), size = 100e3, replace = TRUE)
microbenchmark(dplyr = dplyr::coalesce(x, A, B, C),
hutils = hutils::coalesce(x, A, B, C),
- check = my_check) %T>%
- print %>%
- autoplot
## Unit: microseconds
-## expr min lq mean median uq max neval
-## dplyr 2081.736 2536.0615 5728.88782 2574.914 7626.5565 149937.386 100
-## hutils 58.730 61.2905 74.99327 67.464 83.1255 160.527 100
+## expr min lq mean median uq max neval
+## dplyr 1690.807 2386.0735 4259.60067 2578.6760 2669.481 60602.191 100
+## hutils 58.429 60.9885 71.95122 65.8065 79.511 127.699 100
## cld
## b
## a
-
x <- sample(c(letters, NA), size = 100e3, replace = TRUE)
A <- sample(c(letters), size = 100e3, replace = TRUE)
microbenchmark(dplyr = dplyr::coalesce(x, A, B, C),
hutils = hutils::coalesce(x, A, B, C),
- check = my_check) %T>%
- print %>%
- autoplot
## Unit: microseconds
-## expr min lq mean median uq max neval cld
-## dplyr 2303.402 2681.681 4545.0994 2726.5565 7743.262 9070.248 100 b
-## hutils 574.043 745.564 978.0727 764.9905 790.289 6912.616 100 a
-
+## expr min lq mean median uq max neval
+## dplyr 1813.386 2536.0590 3670.791 2721.5840 2813.8950 8877.487 100
+## hutils 567.718 680.0575 2681.907 764.5375 796.7635 165714.396 100
+## cld
+## a
+## a
rm(DT1, DT2)