Skip to content

Commit

Permalink
Add data_reverse() and document how to reverse-score using data_resca…
Browse files Browse the repository at this point in the history
…le() (#76)

* Remove deprecation plan for change_scale()

I personally really like this alias and would prefer we keep it as an option (similar to `parameters()` vs `model_parameters()`).

Especially inside another function call this looks much more natural to me:

```r
mtcars |>
  dplyr::mutate(mpg_pomp = change_scale(mpg, to = c(0, 100))
```

* Swap data_rescale for change_scale

* Add reverse-scoring examples to data_rescale()

* Tweak warning

* Add data_reverse()

* Add data_reverse.factor()

* Document

* fix typo

* as.data.frame(sapply) -> lapply

Co-authored-by: Indrajeet Patil <patilindrajeet.science@gmail.com>
Co-authored-by: Daniel <mail@danielluedecke.de>
  • Loading branch information
3 people authored Mar 1, 2022
1 parent c850768 commit 9a24180
Show file tree
Hide file tree
Showing 8 changed files with 298 additions and 9 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ S3method(data_rescale,data.frame)
S3method(data_rescale,factor)
S3method(data_rescale,grouped_df)
S3method(data_rescale,numeric)
S3method(data_reverse,data.frame)
S3method(data_reverse,factor)
S3method(data_reverse,grouped_df)
S3method(data_reverse,numeric)
S3method(describe_distribution,character)
S3method(describe_distribution,data.frame)
S3method(describe_distribution,factor)
Expand Down Expand Up @@ -113,6 +117,7 @@ export(data_rename_rows)
export(data_reorder)
export(data_rescale)
export(data_restoretype)
export(data_reverse)
export(data_rotate)
export(data_to_long)
export(data_to_numeric)
Expand Down Expand Up @@ -140,6 +145,7 @@ export(rescale_weights)
export(reshape_ci)
export(reshape_longer)
export(reshape_wider)
export(reverse_scale)
export(skewness)
export(smoothness)
export(standardise)
Expand Down
20 changes: 13 additions & 7 deletions R/data_rescale.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
#' Rescale Variables to a New Range
#'
#' Rescale variables to a new range.
#' Can also be used to reverse-score variables (change the keying/scoring direction).
#'
#' @inheritParams standardize.data.frame
#'
#' @param x A numeric variable.
#' @param to New range that the variable will have after rescaling.
#' @param to Numeric vector of length 2 giving the new range that the variable will have after rescaling.
#' To reverse-score a variable, the range should be given with the maximum value first.
#' See examples.
#' @param range Initial (old) range of values. If `NULL`, will take the range of
#' the input vector (`range(x)`).
#' @param ... Arguments passed to or from other methods.
#'
#' @examples
#' data_rescale(c(0, 1, 5, -5, -2))
#' data_rescale(c(0, 1, 5, -5, -2), to = c(-5, 5))
#' data_rescale(c(1, 2, 3, 4, 5), to = c(-2, 2))
#'
#' # Specify the "theoretical" range of the input vector
#' data_rescale(c(1, 3, 4), to = c(0, 40), range = c(0, 4))
#'
#' # Dataframes
#' # Reverse-score a variable
#' data_rescale(c(1, 2, 3, 4, 5), to = c(5, 1))
#' data_rescale(c(1, 2, 3, 4, 5), to = c(2, -2))
#'
#' # Data frames
#' head(data_rescale(iris, to = c(0, 1)))
#' head(data_rescale(iris, to = c(0, 1), select = "Sepal.Length"))
#'
Expand All @@ -41,9 +49,7 @@ data_rescale <- function(x, ...) {
#' @rdname data_rescale
#' @export
change_scale <- function(x, ...) {
# TODO: Don't deprecate for now
# so we have time to change it accross the verse, but for next round
# .Deprecated("data_rescale")
# Alias for data_rescale()
data_rescale(x, ...)
}

Expand All @@ -69,7 +75,7 @@ data_rescale.numeric <- function(x,
# Warning if only one value
if (length(unique(x)) == 1 && is.null(range)) {
if (verbose) {
warning(paste0("A `range` must be provided for data with only one observation."))
warning("A `range` must be provided for data with only one unique value.")
}
return(x)
}
Expand Down Expand Up @@ -129,7 +135,7 @@ data_rescale.grouped_df <- function(x,

x <- as.data.frame(x)
for (rows in grps) {
x[rows, ] <- change_scale(
x[rows, ] <- data_rescale(
x[rows, ],
select = select,
exclude = exclude,
Expand Down
197 changes: 197 additions & 0 deletions R/data_reverse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#' Reverse-Score Variables
#'
#' Reverse-score variables (change the keying/scoring direction).
#'
#' @inheritParams standardize.data.frame
#'
#' @param x A numeric or factor variable.
#' @param range Initial (old) range of values. If `NULL`, will take the range of
#' the input vector (`range(x)`).
#' @param ... Arguments passed to or from other methods.
#'
#' @examples
#' data_reverse(c(1, 2, 3, 4, 5))
#' data_reverse(c(-2, -1, 0, 2, 1))
#'
#' # Specify the "theoretical" range of the input vector
#' data_reverse(c(1, 3, 4), range = c(0, 4))
#'
#' # Factor variables
#' data_reverse(factor(c(1, 2, 3, 4, 5)))
#' data_reverse(factor(c(1, 2, 3, 4, 5)), range = 0:10)
#'
#' # Data frames
#' head(data_reverse(iris))
#' head(data_reverse(iris, select = "Sepal.Length"))
#'
#' @return A reverse-scored object.
#'
#' @family transform utilities
#'
#' @seealso [data_rescale()] to change the score range for variables (potentially while reversing),
#' [normalize()] [standardize()] [ranktransform()]
#'
#' @export
data_reverse <- function(x, ...) {
UseMethod("data_reverse")
}


#' @rdname data_reverse
#' @export
reverse_scale <- function(x, ...) {
# Alias for data_reverse()
data_reverse(x, ...)
}




#' @rdname data_reverse
#' @export
data_reverse.numeric <- function(x,
range = NULL,
verbose = TRUE,
...) {

# Warning if all NaNs
if (all(is.na(x))) {
return(x)
}

# Warning if only one value
if (length(unique(x)) == 1 && is.null(range)) {
if (verbose) {
warning(paste0("A `range` must be provided for data with only one unique value."))
}
return(x)
}

if (is.null(range)) {
range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE))
}

min <- ifelse(is.na(range[1]), min(x, na.rm = TRUE), range[1])
max <- ifelse(is.na(range[2]), max(x, na.rm = TRUE), range[2])
new_min <- max
new_max <- min

out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min)
out
}




#' @export
data_reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {

# Warning if all NaNs
if (all(is.na(x))) {
return(x)
}

# Warning if only one value
if (length(unique(x)) == 1 && is.null(range)) {
if (verbose) {
warning("A `range` must be provided for data with only one unique value.")
}
return(x)
}

if (!is.null(range)) {
old_levels <- range
x <- factor(x, levels = range)
} else {
old_levels <- levels(x)
}

int_x <- as.integer(x)
rev_x <- data_reverse(int_x, range = c(1, length(old_levels)))
x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels)
x
}




#' @rdname data_reverse
#' @export
data_reverse.grouped_df <- function(x,
range = NULL,
select = NULL,
exclude = NULL,
...) {
info <- attributes(x)

# dplyr >= 0.8.0 returns attribute "indices"
grps <- attr(x, "groups", exact = TRUE)

# check for formula notation, convert to character vector
if (inherits(select, "formula")) {
select <- all.vars(select)
}
if (inherits(exclude, "formula")) {
exclude <- all.vars(exclude)
}

# dplyr < 0.8.0?
if (is.null(grps)) {
grps <- attr(x, "indices", exact = TRUE)
grps <- lapply(grps, function(x) x + 1)
} else {
grps <- grps[[".rows"]]
}

x <- as.data.frame(x)
for (rows in grps) {
x[rows, ] <- data_reverse(
x[rows, ],
select = select,
exclude = exclude,
range = range,
...
)
}
# set back class, so data frame still works with dplyr
attributes(x) <- info
x
}


#' @rdname data_reverse
#' @export
data_reverse.data.frame <- function(x,
range = NULL,
select = NULL,
exclude = NULL,
...) {

# check for formula notation, convert to character vector
if (inherits(select, "formula")) {
select <- all.vars(select)
}
if (inherits(exclude, "formula")) {
exclude <- all.vars(exclude)
}

if (is.null(select)) {
select <- names(x)
}

if (!is.null(exclude)) {
select <- setdiff(select, exclude)
}

# Transform the range so that it is a list now
if (!is.null(range)) {
if (!is.list(range)) {
range <- stats::setNames(rep(list(range), length(select)), select)
}
}

x[select] <- lapply(select, function(n) {
data_reverse(x[[n]], range = range[[n]])
})
x
}
13 changes: 11 additions & 2 deletions man/data_rescale.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 68 additions & 0 deletions man/data_reverse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9a24180

Please sign in to comment.