Skip to content

Commit

Permalink
Merge pull request ndphillips#230 from hneth/master
Browse files Browse the repository at this point in the history
Tests for editing and using tree.definitions
  • Loading branch information
hneth authored Jul 20, 2024
2 parents cf5873d + 3135083 commit 5e2fc65
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 14 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: FFTrees
Type: Package
Title: Generate, Visualise, and Evaluate Fast-and-Frugal Decision Trees
Version: 2.0.0.9000
Date: 2024-07-15
Date: 2024-07-20
Authors@R: c(person("Nathaniel", "Phillips", role = c("aut"), email = "Nathaniel.D.Phillips.is@gmail.com", comment = c(ORCID = "0000-0002-8969-7013")),
person("Hansjoerg", "Neth", role = c("aut", "cre"), email = "h.neth@uni.kn", comment = c(ORCID = "0000-0001-5427-3141")),
person("Jan", "Woike", role = "aut", comment = c(ORCID = "0000-0002-6816-121X")),
Expand Down
5 changes: 4 additions & 1 deletion tests/testthat/test_09_cost.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
context("Costs work")
context("Verify costs work")


test_that("Using goal = 'cost' kills a high cost cue", {
Expand Down Expand Up @@ -28,6 +28,7 @@ test_that("Using goal = 'cost' kills a high cost cue", {
})



test_that("Changing costs without changing goal does NOT affect FFT creation", {

# Create FFTs with outcome costs 1 for goal 'bacc':
Expand Down Expand Up @@ -56,6 +57,7 @@ test_that("Changing costs without changing goal does NOT affect FFT creation", {
})



test_that("Changing costs and goal = 'cost' DOES affect FFT creation", {

# Create FFTs with outcome costs and goal 'cost':
Expand Down Expand Up @@ -83,4 +85,5 @@ test_that("Changing costs and goal = 'cost' DOES affect FFT creation", {
})



# eof.
128 changes: 128 additions & 0 deletions tests/testthat/test_10_tree_definitions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
context("Get, edit, and use tree.definitions")

# Create new FFTs from edited tree.definitions:

test_that("Can get, edit, collect, and create FFTs from tree.definitions", {

# 1. Create an FFTrees object x (for iris data): ------

x <- FFTrees(formula = virginica ~ .,
data = iris.v,
main = "Iris viginica",
decision.labels = c("Not-Vir", "Vir"),
quiet = TRUE)



# 2. Extract/get tree definitions: ------

# Get tree definitions of x (as 1 non-tidy df):

tree_dfs <- get_fft_df(x)

# tree_dfs # 6 tree definitions



# 3. Extract individual tree definitions: ------

# Get/read specific trees (each tree as 1 tidy df):
fft_1 <- read_fft_df(ffts_df = tree_dfs, tree = 1)
fft_3 <- read_fft_df(ffts_df = tree_dfs, tree = 3)



# 4. Edit individual tree definitions: ------

# Reorder nodes:
my_fft_1 <- reorder_nodes(fft = fft_1, order = c(2, 1), quiet = TRUE) # reverse cues
my_fft_2 <- reorder_nodes(fft = fft_3, order = c(2, 1, 3), quiet = TRUE) # no new exit node
my_fft_3 <- reorder_nodes(fft = fft_3, order = c(1, 3, 2), quiet = TRUE) # new exit node

# Flip exits:
my_fft_4 <- flip_exits(my_fft_1, nodes = 1, quiet = TRUE) # flip exits of node 1
my_fft_5 <- flip_exits(my_fft_2, nodes = c(1, 2, 3), quiet = TRUE) # flip only exits of node 1 and 2

# Drop nodes:
my_fft_1 <- drop_nodes(my_fft_1, nodes = 2, quiet = TRUE) # drop exit node
my_fft_2 <- drop_nodes(my_fft_2, nodes = 2, quiet = TRUE) # drop non-exit node

# Edit nodes:
my_fft_3 <- edit_nodes(my_fft_3, # edit 2 nodes:
nodes = c(1, 2),
direction = c("<", "<="),
threshold = c(4.5, 5.5),
exit = c(1, 0),
quiet = TRUE)

# Add nodes:
my_fft_4 <- add_nodes(my_fft_4, nodes = 2, class = "n", cue = "sep.len", direction = "<=", threshold = "5", exit = 0, quiet = TRUE) # new 2nd node
my_fft_5 <- add_nodes(my_fft_5, nodes = 4, class = "n", cue = "sep.len", direction = ">", threshold = "5", exit = .5, quiet = TRUE) # new final node



# 5. Convert and add/collect/gather tree definitions: ------

# Write FFT definition (into 1 non-tidy df):
my_tree_dfs <- write_fft_df(my_fft_1, tree = 1)

# Add other trees (using pipes):
my_tree_dfs <- my_fft_2 |> write_fft_df(tree = 2) |> add_fft_df(my_tree_dfs)
my_tree_dfs <- my_fft_3 |> write_fft_df(tree = 3) |> add_fft_df(my_tree_dfs)
my_tree_dfs <- my_fft_4 |> write_fft_df(tree = 4) |> add_fft_df(my_tree_dfs)
my_tree_dfs <- my_fft_5 |> write_fft_df(tree = 5) |> add_fft_df(my_tree_dfs)

# my_tree_dfs # => 5 new tree definitions


# Add the set of 5 new trees to 6 original ones (re-numbering new ones):
all_fft_dfs <- add_fft_df(my_tree_dfs, tree_dfs)
# all_fft_dfs # => 6 old and 5 new trees = 11 trees



# 6. Apply new tree.definitions to data: ------


# a: Evaluate new tree.definitions for an existing FFTrees object x:

y <- FFTrees(object = x, # existing FFTrees object x
tree.definitions = all_fft_dfs, # set of all FFT definitions
main = "Iris 2", # new label
quiet = TRUE
)


# b: Create a new FFTrees object z (using formula and original data):

z <- FFTrees(formula = virginica ~ .,
data = iris.v, # using original data
tree.definitions = all_fft_dfs, # set of all FFT definitions
main = "Iris 2", # new label
quiet = TRUE
)



# 7. Compare results: ------

# summary(y)
# summary(z)

# all.equal(y, z)

# # Note: Tree #11 is remarkably bad (bacc = 11%).
# plot(z, tree = 11)



# 8. Tests: ------

testthat::expect_is(y, "FFTrees")
testthat::expect_is(z, "FFTrees")


})


# eof.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
context("Handle NA data")

# NA values in predictors:
# Create FFTs when data has NA values in different types of predictors:

test_that("FFTrees works with NA values in categorical predictors", {

Expand All @@ -12,8 +12,8 @@ test_that("FFTrees works with NA values in categorical predictors", {

# Main: Create an FFTrees object:
fft_NA_1 <- FFTrees(crit ~ .,
data = data_NA_categorical,
quiet = TRUE)
data = data_NA_categorical,
quiet = TRUE)

testthat::expect_is(fft_NA_1, "FFTrees")

Expand All @@ -31,8 +31,8 @@ test_that("FFTrees works with NA values in 2 numeric predictors", {

# Create an FFTrees object:
fft_NA_2 <- FFTrees(crit ~ .,
data = data_NA_numeric,
quiet = TRUE)
data = data_NA_numeric,
quiet = TRUE)

testthat::expect_is(fft_NA_2, "FFTrees")

Expand Down
6 changes: 5 additions & 1 deletion vignettes/FFTrees_examples.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ plot(mushrooms_ring_fft, data = "test")
As we can see, this tree (in `mushrooms_ring_fft`) has both sensitivity and specificity values of around\ $80$%, but does not perform as well as our earlier one (in `mushrooms_fft`).
This suggests that we should discard the expert's advice and primarily rely on the\ `odor` and\ `sporepc` cues.


### Iris.v data

```{r iris-image, fig.align = "center", out.width = "225px", echo = FALSE}
Expand All @@ -170,7 +171,7 @@ In this example, we'll create trees using the entire dataset (without splitting
# Create FFTrees object for iris data:
iris_fft <- FFTrees(formula = virginica ~.,
data = iris.v,
main = "Iris",
main = "Iris viginica",
decision.labels = c("Not-Vir", "Vir"))
```

Expand All @@ -187,6 +188,7 @@ summary(iris_fft) # summarize FFTrees object

However, let's first take a look at the individual training cue accuracies...


#### Visualizing cue accuracies

We can plot the training cue accuracies during training by specifying `what = "cues"`:
Expand All @@ -199,6 +201,7 @@ plot(iris_fft, what = "cues")
It looks like the two cues\ `pet.len` and\ `pet.wid` are the best predictors for this dataset.
Based on this insight, we should expect the final trees will likely use one or both of these cues.


#### Visualizing FFT performance

Now let's visualize the best tree:
Expand All @@ -211,6 +214,7 @@ plot(iris_fft)
Indeed, it turns out that the best tree only uses the\ `pet.len` and\ `pet.wid` cues (in that order).
For this data, the fitted tree exhibits a performance with a sensitivity of\ 100% and a specificity of\ 94%.


#### Viewing alternative FFTs

Now, this tree did quite well, but what if someone wanted a tree with the lowest possible false alarm rate?
Expand Down
13 changes: 7 additions & 6 deletions vignettes/FFTrees_mytree.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,9 @@ plot(fft_4, n.per.icon = 50, what = "all", show.iconguide = TRUE)
# Overall accuracy is 10% above baseline (predicting False for all cases).
```

<!-- ToDo: Illustrate 4., as case is instructive. -->
<!-- ToDo: Illustrate 4., as this case is instructive. -->

<!-- ToDo: 2nd way to specify an FFT: -->
<!-- NEXT: 2nd way to specify an FFT: -->
<!-- 2. as a data frame using the `tree.definitions` argument -->


Expand Down Expand Up @@ -364,7 +364,7 @@ When looking at **Figure\ 3**, we first move down on the right side (from retrie

We illustrate a typical workflow by redefining some FFTs that were built in the [Tutorial: FFTs for heart disease](FFTrees_heart.html) and evaluating them on the (full) `heartdisease` data.

To obtain a set of existing tree definitions, we use our default algorithms to create an `FFTrees` object\ `x`:
To obtain a set of existing tree definitions, we use our default algorithm to create an `FFTrees` object\ `x`:

```{r fft-treedef-01, message = FALSE}
# Create an FFTrees object x:
Expand Down Expand Up @@ -401,7 +401,7 @@ Alternatively, we can use the `get_fft_df()` utility function on\ `x` to obtain
(tree_dfs <- get_fft_df(x))
```

The resulting R object\ `tree_dfs` is a data frame with `r ncol(tree_dfs)` variables.
The resulting R object\ `tree_dfs` is a data frame with `r ncol(tree_dfs)`\ variables.
Each of its `r nrow(tree_dfs)` rows defines an FFT in the context of our current `FFTrees` object\ `x` (see the vignette on [Creating FFTs with FFTrees()](FFTrees_function.html) for help on interpreting tree definitions).
As the "ifan" algorithm responsible for creating these trees yields a family of highly similar FFTs (which vary only by their exits, and may truncate some cues), we may want to explore alternative versions of these trees.

Expand Down Expand Up @@ -482,6 +482,7 @@ For instance, the tree definition with a signal exit at the first node of `my_ff
(my_fft_4 <- flip_exits(my_fft_1, nodes = c(1, 2)))
```


#### Using **magrittr** pipes to combine steps

The tree conversion and editing functions do not need to be used separately.
Expand Down Expand Up @@ -569,8 +570,8 @@ When using the main `FFTrees()` function with a set of `tree.definitions` (as a
Importantly, however, the input of `tree.definitions` prevents the generation of new FFTs (via the "ifan" or "dfan" algorithms) and instead evaluates the FFT definitions provided on the data specified:^[If the `tree.definitions` contain cue variables or values that cannot be found in the data, this will result in errors.]

```{r use-tree-definitions-01}
# Evaluate tree.definitions for an existing FFTrees object y:
y <- FFTrees(object = x, # an existing FFTrees object
# Evaluate new tree.definitions for an existing FFTrees object x:
y <- FFTrees(object = x, # existing FFTrees object x
tree.definitions = my_tree_dfs, # new set of FFT definitions
main = "Heart Disease 2" # new label
)
Expand Down

0 comments on commit 5e2fc65

Please sign in to comment.