Skip to content

Commit

Permalink
Function and tests for #129
Browse files Browse the repository at this point in the history
  • Loading branch information
mstackhouse committed Dec 15, 2023
1 parent 64a0dc2 commit 0f01ebc
Show file tree
Hide file tree
Showing 4 changed files with 259 additions and 0 deletions.
120 changes: 120 additions & 0 deletions R/collapse_row_labels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#' Add indentation level based
#'
#' @param .x The number of levels to indent
#' @param .y Input variable for which indentation will be done
#'
#' @return Character string with indentation applied
add_indentation <- function(.x, .y, indent = " ") {
paste(c(rep("",.x-1), .y), collapse=indent)
}


#' Collapse row labels into a single column
#'
#' This is a generalized post processing function that allows you to take groups
#' of by variables and collapse them into a single column. Repeating values are
#' split into separate rows, and for each level of nesting, a specified
#' indentation level can be applied.
#'
#' @param x Input data frame
#' @param ... Row labels to be collapsed
#' @param indent Indentation string to be used, which is multiplied at each indentation level
#' @param target_col The desired name of the output column containing collapsed row labels
#'
#' @return data.frame with row labels collapsed into a single column
#' @export
#'
#' @examples
#' x <- tibble::tribble(
#' ~row_label1, ~row_label2, ~row_label3, ~row_label4, ~var1,
#' "A", "C", "G", "M", 1L,
#' "A", "C", "G", "N", 2L,
#' "A", "C", "H", "O", 3L,
#' "A", "D", "H", "P", 4L,
#' "A", "D", "I", "Q", 5L,
#' "A", "D", "I", "R", 6L,
#' "B", "E", "J", "S", 7L,
#' "B", "E", "J", "T", 8L,
#' "B", "E", "K", "U", 9L,
#' "B", "F", "K", "V", 10L,
#' "B", "F", "L", "W", 11L
#' )
#'
#'
#' collapse_row_labels(x, row_label1, row_label2, row_label3, row_label4)
#'
#' collapse_row_labels(x, row_label1, row_label2, row_label3)
#'
#' collapse_row_labels(x, row_label1, row_label2, indent = " ", target_col = rl)
#'
collapse_row_labels <- function(x, ..., indent = " ", target_col=row_label) {

target_col = enquo(target_col)
dots <- enquos(...)

# browser()
dot_names <- map_chr(dots, as_label)

if (!inherits(x, 'data.frame')) {
stop('x must be a data frame', call.=FALSE)
}

if (!inherits(indent, 'character')) {
stop("indent must be a character string", call.=FALSE)
}

if (!all(map_lgl(dots, quo_is_symbol))) {
stop("Columns provided to dots must be provided as unquoted symbols.", call.=FALSE)
}

if (!all(dot_names %in% names(x))) {
stop("Columns provided to dots are missing from x.", call.=FALSE)
}

if (!quo_is_symbol(target_col)) {
stop("target_col must be provided as an unquoted symbol.", call.=FALSE)
}

if (length(dots) < 2) {
stop("Must have two or more columns to collapse", call.=FALSE)
}

all_but_last <- dots[1:length(dots)-1]

# Add the original row identifier
x['og_row'] <- as.numeric(rownames(x))

# Grab the desired rowlabels, except for the last one specified in the dots
rowlabs <- select(x, !!!all_but_last, og_row)

# Get the distinct list of stubs from the data and grab the nesting level
stubs <- rowlabs %>%
group_by(!!!all_but_last) %>%
slice_head() %>%
pivot_longer(
map_chr(all_but_last, as_label),
names_to = NULL,
values_to = as_label(target_col)
) %>%
group_by(og_row) %>%
mutate(
stub_sort = row_number()
)

# Join back to the original data
x %>%
bind_rows(stubs, .id="id") %>%
# Put everything into the right spot
arrange(og_row, desc(id)) %>%
fill(stub_sort) %>%
mutate(
# Figure out the indentation level
stub_sort = if_else(id == 1, stub_sort + 1, stub_sort),
# Build and indent the new row label column
!!target_col := if_else(is.na(!!target_col), !!!tail(dots, 1), !!target_col),
!!target_col := map2_chr(stub_sort, !!target_col, add_indentation, indent = indent),
# Fill in the empty character fields
across(where(is.character), ~ replace_na(., ''))
) %>%
select(!!target_col, !c(id, og_row, stub_sort, !!!dots))
}
19 changes: 19 additions & 0 deletions man/add_indentation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions man/collapse_row_labels.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 70 additions & 0 deletions tests/testthat/test-collapse_row_labels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
dat <- tibble::tribble(
~row_label1, ~row_label2, ~row_label3, ~row_label4, ~var1,
"A", "C", "G", "M", 1L,
"A", "C", "G", "N", 2L,
"A", "C", "H", "O", 3L,
"A", "D", "H", "P", 4L,
"A", "D", "I", "Q", 5L,
"A", "D", "I", "R", 6L,
"B", "E", "J", "S", 7L,
"B", "E", "J", "T", 8L,
"B", "E", "K", "U", 9L,
"B", "F", "K", "V", 10L,
"B", "F", "L", "W", 11L
)


test_that("Errors generate as expected", {
expect_error(collapse_row_labels(1, blah, blah), "x must be a data frame")
expect_error(
collapse_row_labels(dat, row_label1, row_label2, indent = 1),
"indent must be a character string"
)
expect_error(
collapse_row_labels(dat, row_label1, missing_col),
"Columns provided to dots are missing from x."
)
expect_error(
collapse_row_labels(dat, row_label1, "row_label2"),
"Columns provided to dots must be provided as unquoted symbols."
)

expect_error(
collapse_row_labels(dat, row_label1, row_label2, target_col = "RL"),
"target_col must be provided as an unquoted symbol."
)
expect_error(
collapse_row_labels(dat, row_label1),
"Must have two or more columns to collapse"
)
})

test_that("Row labels collapse appropriately", {

x <- collapse_row_labels(dat, row_label1, row_label2, row_label3, row_label4)

expect_equal(
x$row_label[1:6],
c("A", " C", " G", " M", " N", "A")
)

x <- collapse_row_labels(dat, row_label1, row_label2, row_label3, row_label4, indent = " ")
expect_equal(
x$row_label[1:6],
c("A", " C", " G", " M", " N", "A")
)

x <- collapse_row_labels(dat, row_label1, row_label2, row_label3)
expect_equal(names(x), c("row_label", "row_label4", "var1"))
expect_equal(
x$row_label[1:6],
c("A", " C", " G", " G", " H", "A")
)
expect_equal(
x$row_label4[1:6],
c("", "", "M", "N", "O", "")
)

x <- collapse_row_labels(dat, row_label1, row_label2, row_label3, target_col = rl)
expect_equal(names(x), c("rl", "row_label4", "var1"))
})

0 comments on commit 0f01ebc

Please sign in to comment.