Skip to content

Commit

Permalink
add "name" column to sankey nodes conversion (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
cjyetman authored Dec 16, 2023
1 parent 24347f8 commit 8cb09fb
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 9 deletions.
2 changes: 1 addition & 1 deletion R/as_sankey_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ as_sankey_data.data.frame <-
.data <- as_sankey_data_links(.data)

# build nodes data frame
nodes <- data.frame(id = unique(c(.data$source, .data$target)), group = 1L)
nodes <- as_sankey_data_nodes(data.frame(id = unique(c(.data$source, .data$target))))

return(list(nodes = nodes, links = .data))
}
Expand Down
22 changes: 14 additions & 8 deletions R/as_sankey_data_nodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@ as_sankey_data_nodes.data.frame <-
# set the name of the node id variable to "id"
names(.data)[id_idx] <- "id"

# if `id` is a character vector, set that to the names and make a numeric `id`
if (is.character(.data$id)) {
if (!"name" %in% names(.data)) {
.data$name <- .data$id
}
.data$id <- seq_along(.data$id) - 1
# find a "label" column, otherwise assume it's the first column
label_names <- c("names",
"labels",
"name",
"label")

label_idx <- index_of_first_found_in(tolower(names(.data)), domain = label_names)

if (is.na(label_idx)) {
.data$name <- .data$id
} else {
names(.data)[label_idx] <- "name"
}

# find a "group" column, otherwise make one
Expand All @@ -58,8 +64,8 @@ as_sankey_data_nodes.data.frame <-
names(.data)[group_idx] <- "group"
}

resort_idxs <- c(id_idx, group_idx, setdiff(seq_along(.data), c(id_idx, group_idx)))
.data <- .data[resort_idxs]
xtra_cols <- names(.data)[!names(.data) %in% c("id", "name", "group")]
.data <- .data[c("id", "name", "group", xtra_cols)]

# convert "id" and "group" columns to character
.data$id <- as.character(.data$id)
Expand Down
117 changes: 117 additions & 0 deletions tests/testthat/test-as_sankey_data_nodes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
test_default_characteristics <-
function(.data) {
expect_s3_class(.data, "data.frame")
expect_s3_class(.data, "tbl")

expect_gte(ncol(.data), 3L)
expect_identical(c("id", "name", "group"), names(.data)[1:3])

expect_true(all(vapply(.data, is.atomic, logical(1))))

expect_type(.data[[1L]], "character")
expect_type(.data[[2L]], "character")
expect_type(.data[[3L]], "character")
}


test_that("as_sankey_data_nodes() handles a data frame with only an id column", {
example <- data.frame(id = c("a", "b"))
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "1"))
})


test_that("as_sankey_data_nodes() handles a data frame with only an name column", {
example <- data.frame(name = c("a", "b"))
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "1"))
})


test_that("as_sankey_data_nodes() handles a data frame with a name and 1 extra column", {
example <- data.frame(name = c("a", "b"), other = 1L)
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "1"))
expect_identical(result[[4L]], c(1L, 1L))
})


test_that("as_sankey_data_nodes() handles a data frame with a name and group column", {
example <- data.frame(name = c("a", "b"), group = c(1L, 2L))
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "2"))
})


test_that("as_sankey_data_nodes() handles a data frame with no properly named id column", {
example <- data.frame(x = c("a", "b"), group = 1L)
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "1"))
})


test_that("as_sankey_data_nodes() handles a data frame with group column that is named by na argument", {
example <- data.frame(id = c("a", "b"), x = c(1, 2))
result <- as_sankey_data_nodes(example, group = "x")
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "2"))
})


test_that("as_sankey_data_nodes() handles a list with only an id column", {
example <- list(list(id = "a"), list(id = "b"))
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "1"))
})


test_that("as_sankey_data_nodes() handles a list with an id and group column", {
example <- list(list(id = "a", group = 1L), list(id = "b", group = 1L))
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "1"))
})


test_that("as_sankey_data_nodes() handles a list with an id and an extra column", {
example <- list(list(id = "a", other = 1L), list(id = "b", other = 1L))
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("1", "1"))
expect_identical(result[[4L]], c(1L, 1L))
})


test_that("as_sankey_data_nodes() handles a list with mixed columns", {
example <- list(list(id = "a", other = 1L), list(id = "b", group = 1L))
result <- as_sankey_data_nodes(example)
test_default_characteristics(result)
expect_identical(result[[1L]], c("a", "b"))
expect_identical(result[[2L]], c("a", "b"))
expect_identical(result[[3L]], c("NA", "1"))
expect_identical(result[[4L]], c(1L, NA_integer_))
})

0 comments on commit 8cb09fb

Please sign in to comment.