Add data_reverse() and document how to reverse-score using data_resca…

…le() (#76) * Remove deprecation plan for change_scale() I personally really like this alias and would prefer we keep it as an option (similar to `parameters()` vs `model_parameters()`). Especially inside another function call this looks much more natural to me: ```r mtcars |> dplyr::mutate(mpg_pomp = change_scale(mpg, to = c(0, 100)) ``` * Swap data_rescale for change_scale * Add reverse-scoring examples to data_rescale() * Tweak warning * Add data_reverse() * Add data_reverse.factor() * Document * fix typo * as.data.frame(sapply) -> lapply Co-authored-by: Indrajeet Patil <patilindrajeet.science@gmail.com> Co-authored-by: Daniel <mail@danielluedecke.de>
easystats · Mar 1, 2022 · 9a24180 · 9a24180
1 parent c850768
commit 9a24180
Show file tree

Hide file tree

Showing 8 changed files with 298 additions and 9 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -33,6 +33,10 @@ S3method(data_rescale,data.frame)
 S3method(data_rescale,factor)
 S3method(data_rescale,grouped_df)
 S3method(data_rescale,numeric)
+S3method(data_reverse,data.frame)
+S3method(data_reverse,factor)
+S3method(data_reverse,grouped_df)
+S3method(data_reverse,numeric)
 S3method(describe_distribution,character)
 S3method(describe_distribution,data.frame)
 S3method(describe_distribution,factor)
@@ -113,6 +117,7 @@ export(data_rename_rows)
 export(data_reorder)
 export(data_rescale)
 export(data_restoretype)
+export(data_reverse)
 export(data_rotate)
 export(data_to_long)
 export(data_to_numeric)
@@ -140,6 +145,7 @@ export(rescale_weights)
 export(reshape_ci)
 export(reshape_longer)
 export(reshape_wider)
+export(reverse_scale)
 export(skewness)
 export(smoothness)
 export(standardise)

diff --git a/R/data_rescale.R b/R/data_rescale.R
@@ -1,23 +1,31 @@
 #' Rescale Variables to a New Range
 #'
 #' Rescale variables to a new range.
+#' Can also be used to reverse-score variables (change the keying/scoring direction).
 #'
 #' @inheritParams standardize.data.frame
 #'
 #' @param x A numeric variable.
-#' @param to New range that the variable will have after rescaling.
+#' @param to Numeric vector of length 2 giving the new range that the variable will have after rescaling.
+#'   To reverse-score a variable, the range should be given with the maximum value first.
+#'   See examples.
 #' @param range Initial (old) range of values. If `NULL`, will take the range of
 #'   the input vector (`range(x)`).
 #' @param ... Arguments passed to or from other methods.
 #'
 #' @examples
 #' data_rescale(c(0, 1, 5, -5, -2))
 #' data_rescale(c(0, 1, 5, -5, -2), to = c(-5, 5))
+#' data_rescale(c(1, 2, 3, 4, 5), to = c(-2, 2))
 #'
 #' # Specify the "theoretical" range of the input vector
 #' data_rescale(c(1, 3, 4), to = c(0, 40), range = c(0, 4))
 #'
-#' # Dataframes
+#' # Reverse-score a variable
+#' data_rescale(c(1, 2, 3, 4, 5), to = c(5, 1))
+#' data_rescale(c(1, 2, 3, 4, 5), to = c(2, -2))
+#'
+#' # Data frames
 #' head(data_rescale(iris, to = c(0, 1)))
 #' head(data_rescale(iris, to = c(0, 1), select = "Sepal.Length"))
 #'
@@ -41,9 +49,7 @@ data_rescale <- function(x, ...) {
 #' @rdname data_rescale
 #' @export
 change_scale <- function(x, ...) {
-  # TODO: Don't deprecate for now
-  # so we have time to change it accross the verse, but for next round
-  # .Deprecated("data_rescale")
+  # Alias for data_rescale()
   data_rescale(x, ...)
 }
 
@@ -69,7 +75,7 @@ data_rescale.numeric <- function(x,
   # Warning if only one value
   if (length(unique(x)) == 1 && is.null(range)) {
     if (verbose) {
-      warning(paste0("A `range` must be provided for data with only one observation."))
+      warning("A `range` must be provided for data with only one unique value.")
     }
     return(x)
   }
@@ -129,7 +135,7 @@ data_rescale.grouped_df <- function(x,
 
   x <- as.data.frame(x)
   for (rows in grps) {
-    x[rows, ] <- change_scale(
+    x[rows, ] <- data_rescale(
       x[rows, ],
       select = select,
       exclude = exclude,

diff --git a/R/data_reverse.R b/R/data_reverse.R
@@ -0,0 +1,197 @@
+#' Reverse-Score Variables
+#'
+#' Reverse-score variables (change the keying/scoring direction).
+#'
+#' @inheritParams standardize.data.frame
+#'
+#' @param x A numeric or factor variable.
+#' @param range Initial (old) range of values. If `NULL`, will take the range of
+#'   the input vector (`range(x)`).
+#' @param ... Arguments passed to or from other methods.
+#'
+#' @examples
+#' data_reverse(c(1, 2, 3, 4, 5))
+#' data_reverse(c(-2, -1, 0, 2, 1))
+#'
+#' # Specify the "theoretical" range of the input vector
+#' data_reverse(c(1, 3, 4), range = c(0, 4))
+#'
+#' # Factor variables
+#' data_reverse(factor(c(1, 2, 3, 4, 5)))
+#' data_reverse(factor(c(1, 2, 3, 4, 5)), range = 0:10)
+#'
+#' # Data frames
+#' head(data_reverse(iris))
+#' head(data_reverse(iris, select = "Sepal.Length"))
+#'
+#' @return A reverse-scored object.
+#'
+#' @family transform utilities
+#'
+#' @seealso [data_rescale()] to change the score range for variables (potentially while reversing),
+#'   [normalize()] [standardize()] [ranktransform()]
+#'
+#' @export
+data_reverse <- function(x, ...) {
+  UseMethod("data_reverse")
+}
+
+
+#' @rdname data_reverse
+#' @export
+reverse_scale <- function(x, ...) {
+  # Alias for data_reverse()
+  data_reverse(x, ...)
+}
+
+
+
+
+#' @rdname data_reverse
+#' @export
+data_reverse.numeric <- function(x,
+                                 range = NULL,
+                                 verbose = TRUE,
+                                 ...) {
+
+  # Warning if all NaNs
+  if (all(is.na(x))) {
+    return(x)
+  }
+
+  # Warning if only one value
+  if (length(unique(x)) == 1 && is.null(range)) {
+    if (verbose) {
+      warning(paste0("A `range` must be provided for data with only one unique value."))
+    }
+    return(x)
+  }
+
+  if (is.null(range)) {
+    range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE))
+  }
+
+  min <- ifelse(is.na(range[1]), min(x, na.rm = TRUE), range[1])
+  max <- ifelse(is.na(range[2]), max(x, na.rm = TRUE), range[2])
+  new_min <- max
+  new_max <- min
+
+  out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min)
+  out
+}
+
+
+
+
+#' @export
+data_reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {
+
+  # Warning if all NaNs
+  if (all(is.na(x))) {
+    return(x)
+  }
+
+  # Warning if only one value
+  if (length(unique(x)) == 1 && is.null(range)) {
+    if (verbose) {
+      warning("A `range` must be provided for data with only one unique value.")
+    }
+    return(x)
+  }
+
+  if (!is.null(range)) {
+    old_levels <- range
+    x <- factor(x, levels = range)
+  } else {
+    old_levels <- levels(x)
+  }
+
+  int_x <- as.integer(x)
+  rev_x <- data_reverse(int_x, range = c(1, length(old_levels)))
+  x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels)
+  x
+}
+
+
+
+
+#' @rdname data_reverse
+#' @export
+data_reverse.grouped_df <- function(x,
+                                    range = NULL,
+                                    select = NULL,
+                                    exclude = NULL,
+                                    ...) {
+  info <- attributes(x)
+
+  # dplyr >= 0.8.0 returns attribute "indices"
+  grps <- attr(x, "groups", exact = TRUE)
+
+  # check for formula notation, convert to character vector
+  if (inherits(select, "formula")) {
+    select <- all.vars(select)
+  }
+  if (inherits(exclude, "formula")) {
+    exclude <- all.vars(exclude)
+  }
+
+  # dplyr < 0.8.0?
+  if (is.null(grps)) {
+    grps <- attr(x, "indices", exact = TRUE)
+    grps <- lapply(grps, function(x) x + 1)
+  } else {
+    grps <- grps[[".rows"]]
+  }
+
+  x <- as.data.frame(x)
+  for (rows in grps) {
+    x[rows, ] <- data_reverse(
+      x[rows, ],
+      select = select,
+      exclude = exclude,
+      range = range,
+      ...
+    )
+  }
+  # set back class, so data frame still works with dplyr
+  attributes(x) <- info
+  x
+}
+
+
+#' @rdname data_reverse
+#' @export
+data_reverse.data.frame <- function(x,
+                                    range = NULL,
+                                    select = NULL,
+                                    exclude = NULL,
+                                    ...) {
+
+  # check for formula notation, convert to character vector
+  if (inherits(select, "formula")) {
+    select <- all.vars(select)
+  }
+  if (inherits(exclude, "formula")) {
+    exclude <- all.vars(exclude)
+  }
+
+  if (is.null(select)) {
+    select <- names(x)
+  }
+
+  if (!is.null(exclude)) {
+    select <- setdiff(select, exclude)
+  }
+
+  # Transform the range so that it is a list now
+  if (!is.null(range)) {
+    if (!is.list(range)) {
+      range <- stats::setNames(rep(list(range), length(select)), select)
+    }
+  }
+
+  x[select] <- lapply(select, function(n) {
+    data_reverse(x[[n]], range = range[[n]])
+  })
+  x
+}
diff --git a/man/data_rescale.Rd b/man/data_rescale.Rd
diff --git a/man/data_reverse.Rd b/man/data_reverse.Rd