From 9a241805c10acf18cbc82e134fbdd83b981e93cb Mon Sep 17 00:00:00 2001 From: "Brenton M. Wiernik" Date: Tue, 1 Mar 2022 13:56:08 -0500 Subject: [PATCH] Add data_reverse() and document how to reverse-score using data_rescale() (#76) * Remove deprecation plan for change_scale() I personally really like this alias and would prefer we keep it as an option (similar to `parameters()` vs `model_parameters()`). Especially inside another function call this looks much more natural to me: ```r mtcars |> dplyr::mutate(mpg_pomp = change_scale(mpg, to = c(0, 100)) ``` * Swap data_rescale for change_scale * Add reverse-scoring examples to data_rescale() * Tweak warning * Add data_reverse() * Add data_reverse.factor() * Document * fix typo * as.data.frame(sapply) -> lapply Co-authored-by: Indrajeet Patil Co-authored-by: Daniel --- NAMESPACE | 6 ++ R/data_rescale.R | 20 +++-- R/data_reverse.R | 197 +++++++++++++++++++++++++++++++++++++++++++ man/data_rescale.Rd | 13 ++- man/data_reverse.Rd | 68 +++++++++++++++ man/normalize.Rd | 1 + man/ranktransform.Rd | 1 + man/standardize.Rd | 1 + 8 files changed, 298 insertions(+), 9 deletions(-) create mode 100644 R/data_reverse.R create mode 100644 man/data_reverse.Rd diff --git a/NAMESPACE b/NAMESPACE index d59950ec8..ef24c0924 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,10 @@ S3method(data_rescale,data.frame) S3method(data_rescale,factor) S3method(data_rescale,grouped_df) S3method(data_rescale,numeric) +S3method(data_reverse,data.frame) +S3method(data_reverse,factor) +S3method(data_reverse,grouped_df) +S3method(data_reverse,numeric) S3method(describe_distribution,character) S3method(describe_distribution,data.frame) S3method(describe_distribution,factor) @@ -113,6 +117,7 @@ export(data_rename_rows) export(data_reorder) export(data_rescale) export(data_restoretype) +export(data_reverse) export(data_rotate) export(data_to_long) export(data_to_numeric) @@ -140,6 +145,7 @@ export(rescale_weights) export(reshape_ci) export(reshape_longer) export(reshape_wider) +export(reverse_scale) export(skewness) export(smoothness) export(standardise) diff --git a/R/data_rescale.R b/R/data_rescale.R index 0ec6d58ff..2eaa0ed9f 100644 --- a/R/data_rescale.R +++ b/R/data_rescale.R @@ -1,11 +1,14 @@ #' Rescale Variables to a New Range #' #' Rescale variables to a new range. +#' Can also be used to reverse-score variables (change the keying/scoring direction). #' #' @inheritParams standardize.data.frame #' #' @param x A numeric variable. -#' @param to New range that the variable will have after rescaling. +#' @param to Numeric vector of length 2 giving the new range that the variable will have after rescaling. +#' To reverse-score a variable, the range should be given with the maximum value first. +#' See examples. #' @param range Initial (old) range of values. If `NULL`, will take the range of #' the input vector (`range(x)`). #' @param ... Arguments passed to or from other methods. @@ -13,11 +16,16 @@ #' @examples #' data_rescale(c(0, 1, 5, -5, -2)) #' data_rescale(c(0, 1, 5, -5, -2), to = c(-5, 5)) +#' data_rescale(c(1, 2, 3, 4, 5), to = c(-2, 2)) #' #' # Specify the "theoretical" range of the input vector #' data_rescale(c(1, 3, 4), to = c(0, 40), range = c(0, 4)) #' -#' # Dataframes +#' # Reverse-score a variable +#' data_rescale(c(1, 2, 3, 4, 5), to = c(5, 1)) +#' data_rescale(c(1, 2, 3, 4, 5), to = c(2, -2)) +#' +#' # Data frames #' head(data_rescale(iris, to = c(0, 1))) #' head(data_rescale(iris, to = c(0, 1), select = "Sepal.Length")) #' @@ -41,9 +49,7 @@ data_rescale <- function(x, ...) { #' @rdname data_rescale #' @export change_scale <- function(x, ...) { - # TODO: Don't deprecate for now - # so we have time to change it accross the verse, but for next round - # .Deprecated("data_rescale") + # Alias for data_rescale() data_rescale(x, ...) } @@ -69,7 +75,7 @@ data_rescale.numeric <- function(x, # Warning if only one value if (length(unique(x)) == 1 && is.null(range)) { if (verbose) { - warning(paste0("A `range` must be provided for data with only one observation.")) + warning("A `range` must be provided for data with only one unique value.") } return(x) } @@ -129,7 +135,7 @@ data_rescale.grouped_df <- function(x, x <- as.data.frame(x) for (rows in grps) { - x[rows, ] <- change_scale( + x[rows, ] <- data_rescale( x[rows, ], select = select, exclude = exclude, diff --git a/R/data_reverse.R b/R/data_reverse.R new file mode 100644 index 000000000..07ada4c1c --- /dev/null +++ b/R/data_reverse.R @@ -0,0 +1,197 @@ +#' Reverse-Score Variables +#' +#' Reverse-score variables (change the keying/scoring direction). +#' +#' @inheritParams standardize.data.frame +#' +#' @param x A numeric or factor variable. +#' @param range Initial (old) range of values. If `NULL`, will take the range of +#' the input vector (`range(x)`). +#' @param ... Arguments passed to or from other methods. +#' +#' @examples +#' data_reverse(c(1, 2, 3, 4, 5)) +#' data_reverse(c(-2, -1, 0, 2, 1)) +#' +#' # Specify the "theoretical" range of the input vector +#' data_reverse(c(1, 3, 4), range = c(0, 4)) +#' +#' # Factor variables +#' data_reverse(factor(c(1, 2, 3, 4, 5))) +#' data_reverse(factor(c(1, 2, 3, 4, 5)), range = 0:10) +#' +#' # Data frames +#' head(data_reverse(iris)) +#' head(data_reverse(iris, select = "Sepal.Length")) +#' +#' @return A reverse-scored object. +#' +#' @family transform utilities +#' +#' @seealso [data_rescale()] to change the score range for variables (potentially while reversing), +#' [normalize()] [standardize()] [ranktransform()] +#' +#' @export +data_reverse <- function(x, ...) { + UseMethod("data_reverse") +} + + +#' @rdname data_reverse +#' @export +reverse_scale <- function(x, ...) { + # Alias for data_reverse() + data_reverse(x, ...) +} + + + + +#' @rdname data_reverse +#' @export +data_reverse.numeric <- function(x, + range = NULL, + verbose = TRUE, + ...) { + + # Warning if all NaNs + if (all(is.na(x))) { + return(x) + } + + # Warning if only one value + if (length(unique(x)) == 1 && is.null(range)) { + if (verbose) { + warning(paste0("A `range` must be provided for data with only one unique value.")) + } + return(x) + } + + if (is.null(range)) { + range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE)) + } + + min <- ifelse(is.na(range[1]), min(x, na.rm = TRUE), range[1]) + max <- ifelse(is.na(range[2]), max(x, na.rm = TRUE), range[2]) + new_min <- max + new_max <- min + + out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min) + out +} + + + + +#' @export +data_reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) { + + # Warning if all NaNs + if (all(is.na(x))) { + return(x) + } + + # Warning if only one value + if (length(unique(x)) == 1 && is.null(range)) { + if (verbose) { + warning("A `range` must be provided for data with only one unique value.") + } + return(x) + } + + if (!is.null(range)) { + old_levels <- range + x <- factor(x, levels = range) + } else { + old_levels <- levels(x) + } + + int_x <- as.integer(x) + rev_x <- data_reverse(int_x, range = c(1, length(old_levels))) + x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels) + x +} + + + + +#' @rdname data_reverse +#' @export +data_reverse.grouped_df <- function(x, + range = NULL, + select = NULL, + exclude = NULL, + ...) { + info <- attributes(x) + + # dplyr >= 0.8.0 returns attribute "indices" + grps <- attr(x, "groups", exact = TRUE) + + # check for formula notation, convert to character vector + if (inherits(select, "formula")) { + select <- all.vars(select) + } + if (inherits(exclude, "formula")) { + exclude <- all.vars(exclude) + } + + # dplyr < 0.8.0? + if (is.null(grps)) { + grps <- attr(x, "indices", exact = TRUE) + grps <- lapply(grps, function(x) x + 1) + } else { + grps <- grps[[".rows"]] + } + + x <- as.data.frame(x) + for (rows in grps) { + x[rows, ] <- data_reverse( + x[rows, ], + select = select, + exclude = exclude, + range = range, + ... + ) + } + # set back class, so data frame still works with dplyr + attributes(x) <- info + x +} + + +#' @rdname data_reverse +#' @export +data_reverse.data.frame <- function(x, + range = NULL, + select = NULL, + exclude = NULL, + ...) { + + # check for formula notation, convert to character vector + if (inherits(select, "formula")) { + select <- all.vars(select) + } + if (inherits(exclude, "formula")) { + exclude <- all.vars(exclude) + } + + if (is.null(select)) { + select <- names(x) + } + + if (!is.null(exclude)) { + select <- setdiff(select, exclude) + } + + # Transform the range so that it is a list now + if (!is.null(range)) { + if (!is.list(range)) { + range <- stats::setNames(rep(list(range), length(select)), select) + } + } + + x[select] <- lapply(select, function(n) { + data_reverse(x[[n]], range = range[[n]]) + }) + x +} diff --git a/man/data_rescale.Rd b/man/data_rescale.Rd index a02a3adc9..b6de38943 100644 --- a/man/data_rescale.Rd +++ b/man/data_rescale.Rd @@ -37,7 +37,9 @@ change_scale(x, ...) \item{...}{Arguments passed to or from other methods.} -\item{to}{New range that the variable will have after rescaling.} +\item{to}{Numeric vector of length 2 giving the new range that the variable will have after rescaling. +To reverse-score a variable, the range should be given with the maximum value first. +See examples.} \item{range}{Initial (old) range of values. If \code{NULL}, will take the range of the input vector (\code{range(x)}).} @@ -54,15 +56,21 @@ A rescaled object. } \description{ Rescale variables to a new range. +Can also be used to reverse-score variables (change the keying/scoring direction). } \examples{ data_rescale(c(0, 1, 5, -5, -2)) data_rescale(c(0, 1, 5, -5, -2), to = c(-5, 5)) +data_rescale(c(1, 2, 3, 4, 5), to = c(-2, 2)) # Specify the "theoretical" range of the input vector data_rescale(c(1, 3, 4), to = c(0, 40), range = c(0, 4)) -# Dataframes +# Reverse-score a variable +data_rescale(c(1, 2, 3, 4, 5), to = c(5, 1)) +data_rescale(c(1, 2, 3, 4, 5), to = c(2, -2)) + +# Data frames head(data_rescale(iris, to = c(0, 1))) head(data_rescale(iris, to = c(0, 1), select = "Sepal.Length")) @@ -76,6 +84,7 @@ head(data_rescale(iris, to = list( \code{\link[=normalize]{normalize()}} \code{\link[=standardize]{standardize()}} \code{\link[=ranktransform]{ranktransform()}} Other transform utilities: +\code{\link{data_reverse}()}, \code{\link{normalize}()}, \code{\link{ranktransform}()}, \code{\link{standardize}()} diff --git a/man/data_reverse.Rd b/man/data_reverse.Rd new file mode 100644 index 000000000..30e93a0c7 --- /dev/null +++ b/man/data_reverse.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_reverse.R +\name{data_reverse} +\alias{data_reverse} +\alias{reverse_scale} +\alias{data_reverse.numeric} +\alias{data_reverse.grouped_df} +\alias{data_reverse.data.frame} +\title{Reverse-Score Variables} +\usage{ +data_reverse(x, ...) + +reverse_scale(x, ...) + +\method{data_reverse}{numeric}(x, range = NULL, verbose = TRUE, ...) + +\method{data_reverse}{grouped_df}(x, range = NULL, select = NULL, exclude = NULL, ...) + +\method{data_reverse}{data.frame}(x, range = NULL, select = NULL, exclude = NULL, ...) +} +\arguments{ +\item{x}{A numeric or factor variable.} + +\item{...}{Arguments passed to or from other methods.} + +\item{range}{Initial (old) range of values. If \code{NULL}, will take the range of +the input vector (\code{range(x)}).} + +\item{verbose}{Toggle warnings and messages on or off.} + +\item{select}{Character vector of column names. If \code{NULL} (the default), all +variables will be selected.} + +\item{exclude}{Character vector of column names to be excluded from selection.} +} +\value{ +A reverse-scored object. +} +\description{ +Reverse-score variables (change the keying/scoring direction). +} +\examples{ +data_reverse(c(1, 2, 3, 4, 5)) +data_reverse(c(-2, -1, 0, 2, 1)) + +# Specify the "theoretical" range of the input vector +data_reverse(c(1, 3, 4), range = c(0, 4)) + +# Factor variables +data_reverse(factor(c(1, 2, 3, 4, 5))) +data_reverse(factor(c(1, 2, 3, 4, 5)), range = 0:10) + +# Data frames +head(data_reverse(iris)) +head(data_reverse(iris, select = "Sepal.Length")) + +} +\seealso{ +\code{\link[=data_rescale]{data_rescale()}} to change the score range for variables (potentially while reversing), +\code{\link[=normalize]{normalize()}} \code{\link[=standardize]{standardize()}} \code{\link[=ranktransform]{ranktransform()}} + +Other transform utilities: +\code{\link{data_rescale}()}, +\code{\link{normalize}()}, +\code{\link{ranktransform}()}, +\code{\link{standardize}()} +} +\concept{transform utilities} diff --git a/man/normalize.Rd b/man/normalize.Rd index bc372e757..d53044159 100644 --- a/man/normalize.Rd +++ b/man/normalize.Rd @@ -81,6 +81,7 @@ Regression with Beta-Distributed Dependent Variables. Psychological Methods, \seealso{ Other transform utilities: \code{\link{data_rescale}()}, +\code{\link{data_reverse}()}, \code{\link{ranktransform}()}, \code{\link{standardize}()} } diff --git a/man/ranktransform.Rd b/man/ranktransform.Rd index dff5f43ce..0d4a67336 100644 --- a/man/ranktransform.Rd +++ b/man/ranktransform.Rd @@ -65,6 +65,7 @@ head(ranktransform(trees)) \seealso{ Other transform utilities: \code{\link{data_rescale}()}, +\code{\link{data_reverse}()}, \code{\link{normalize}()}, \code{\link{standardize}()} } diff --git a/man/standardize.Rd b/man/standardize.Rd index e0aed58e4..06009703c 100644 --- a/man/standardize.Rd +++ b/man/standardize.Rd @@ -215,6 +215,7 @@ See \code{\link[=center]{center()}} for grand-mean centering of variables. Other transform utilities: \code{\link{data_rescale}()}, +\code{\link{data_reverse}()}, \code{\link{normalize}()}, \code{\link{ranktransform}()} }