From dbf43be870bf3faff6c41ed669ec5fc22cfecb19 Mon Sep 17 00:00:00 2001 From: bcjaeger Date: Sat, 4 May 2024 09:47:33 -0400 Subject: [PATCH 1/6] v 0.1.4 submitted to CRAN --- CRAN-SUBMISSION | 6 +++--- cran-comments.md | 6 +----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index dbc50b70..e096d9c0 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.1.3 -Date: 2024-01-19 22:06:35 UTC -SHA: a13cec24750e0efdda2624199e8c4e48506e287b +Version: 0.1.4 +Date: 2024-05-03 19:26:08 UTC +SHA: ef7c70ebd2dc070c696af0fb4d24c066bbcef487 diff --git a/cran-comments.md b/cran-comments.md index 50c2d321..66bf139d 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,6 +1,2 @@ -With the help of a colleague, I was able to reproduce the undefined behavior error. I am confident that it is now resolved. - -I understand you are taking an extended break from reviewing this package and I'm comfortable waiting until the break is concluded. Thank you for everything you do. - -I saw that it took a long time to run tests on my last submission. I added rules to skip tests on CRAN that require longer run times. I hope this is helpful. +This patch release contains a few minor bug fixes. From 64db3504efc29799193ed52fdb85bc0c687748ef Mon Sep 17 00:00:00 2001 From: bcjaeger Date: Sat, 4 May 2024 09:48:04 -0400 Subject: [PATCH 2/6] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bb8fc8f8..f251268b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: aorsf Title: Accelerated Oblique Random Forests -Version: 0.1.4 +Version: 0.1.4.9001 Authors@R: c( person("Byron", "Jaeger", , "bjaeger@wakehealth.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7399-2299")), From af2e88101c39ab1ba79467b686cdb4d301689357 Mon Sep 17 00:00:00 2001 From: bcjaeger Date: Sat, 4 May 2024 09:50:03 -0400 Subject: [PATCH 3/6] drop=FALSE to prevent coercion --- R/orsf_R6.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/orsf_R6.R b/R/orsf_R6.R index 29979b46..9e789598 100644 --- a/R/orsf_R6.R +++ b/R/orsf_R6.R @@ -3153,7 +3153,7 @@ ObliqueForest <- R6::R6Class( private$y <- select_cols(self$data, private$data_names$y) if(self$na_action == 'omit' && !placeholder) - private$y <- private$y[private$data_rows_complete, ] + private$y <- private$y[private$data_rows_complete, , drop = FALSE] private$prep_y_internal(placeholder) From c557da2ab580225dd8370f44fbd1e236ea090ef5 Mon Sep 17 00:00:00 2001 From: bcjaeger Date: Sat, 4 May 2024 09:50:14 -0400 Subject: [PATCH 4/6] update news --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 5b38cc32..0e16427e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# aorsf 0.1.5 (unreleased) + +* fixed an issue where omitting NA values would cause an error in regression forests. + # aorsf 0.1.4 * `orsf_vs` now returns a column that contains non-reference coded variable names (see https://github.com/ropensci/aorsf/pull/52). From 04e2e544886781df737ec3ff205c38d35016b00b Mon Sep 17 00:00:00 2001 From: bcjaeger Date: Sat, 4 May 2024 11:18:03 -0400 Subject: [PATCH 5/6] ensure oobag compatible with na_action --- tests/testthat/test-na_action.R | 113 ++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/testthat/test-na_action.R diff --git a/tests/testthat/test-na_action.R b/tests/testthat/test-na_action.R new file mode 100644 index 00000000..3049fb2e --- /dev/null +++ b/tests/testthat/test-na_action.R @@ -0,0 +1,113 @@ + +test_that( + desc = "na action of omit works with oobag preds", + code = { + + mtcars_na <- mtcars + + mtcars_na$vs <- factor(mtcars_na$vs) + + mtcars_na$disp[1] <- NA + mtcars_na$hp[3] <- NA + + regr_fit <- orsf( + data = mtcars_na, formula = mpg ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'omit' + ) + + clsf_fit <- orsf( + data = mtcars_na, formula = vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'omit' + ) + + surv_fit <- orsf( + mtcars_na, mpg + vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'omit' + ) + + expect_equal(nrow(na.omit(mtcars_na)), regr_fit$n_obs) + expect_equal(nrow(na.omit(mtcars_na)), clsf_fit$n_obs) + expect_equal(nrow(na.omit(mtcars_na)), surv_fit$n_obs) + + clsf_prd_oob <- predict(clsf_fit, oobag = TRUE) + regr_prd_oob <- predict(regr_fit, oobag = TRUE) + surv_prd_oob <- predict(surv_fit, oobag = TRUE) + + na_rows <- which(is.na(clsf_fit$pred_oobag)) + expect_true(all(is.na(clsf_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(regr_fit$pred_oobag)) + expect_true(all(is.na(regr_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(surv_fit$pred_oobag)) + expect_true(all(is.na(surv_prd_oob[na_rows, drop = FALSE]))) + + }) + + + +test_that( + desc = "na action of impute works with oobag preds", + code = { + + mtcars_na <- mtcars + + mtcars_na$vs <- factor(mtcars_na$vs) + + mtcars_na$disp[1] <- NA + mtcars_na$hp[3] <- NA + + regr_fit <- orsf( + data = mtcars_na, formula = mpg ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'impute_meanmode' + ) + + clsf_fit <- orsf( + data = mtcars_na, formula = vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'impute_meanmode' + ) + + surv_fit <- orsf( + mtcars_na, mpg + vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'impute_meanmode' + ) + + expect_equal(nrow(mtcars_na), regr_fit$n_obs) + expect_equal(nrow(mtcars_na), clsf_fit$n_obs) + expect_equal(nrow(mtcars_na), surv_fit$n_obs) + + clsf_prd_oob <- predict(clsf_fit, oobag = TRUE) + regr_prd_oob <- predict(regr_fit, oobag = TRUE) + surv_prd_oob <- predict(surv_fit, oobag = TRUE) + + expect_equal(nrow(mtcars_na), regr_fit$n_obs) + expect_equal(nrow(mtcars_na), clsf_fit$n_obs) + expect_equal(nrow(mtcars_na), surv_fit$n_obs) + + clsf_prd_oob <- predict(clsf_fit, oobag = TRUE) + regr_prd_oob <- predict(regr_fit, oobag = TRUE) + surv_prd_oob <- predict(surv_fit, oobag = TRUE) + + na_rows <- which(is.na(clsf_fit$pred_oobag)) + expect_true(all(is.na(clsf_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(regr_fit$pred_oobag)) + expect_true(all(is.na(regr_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(surv_fit$pred_oobag)) + expect_true(all(is.na(surv_prd_oob[na_rows, drop = FALSE]))) + + }) + From 8c1fe252f60eba48ae9d914fb8977096d206475f Mon Sep 17 00:00:00 2001 From: bcjaeger Date: Sat, 4 May 2024 11:18:48 -0400 Subject: [PATCH 6/6] postpone dimension test to allow na_action of 'omit' --- R/orsf_R6.R | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/R/orsf_R6.R b/R/orsf_R6.R index 9e789598..e7352e68 100644 --- a/R/orsf_R6.R +++ b/R/orsf_R6.R @@ -441,10 +441,6 @@ ObliqueForest <- R6::R6Class( # object and then use this function. We need checks for that case. new_data <- new_data %||% self$data - if(oobag && nrow(new_data) != self$n_obs){ - stop("input data must have ", self$n_obs, " observations to ", - "compute out-of-bag predictions.", call. = FALSE) - } # run checks before you assign new values to object. # otherwise, if a check throws an error, the object will @@ -476,6 +472,12 @@ ObliqueForest <- R6::R6Class( private$init_data_rows_complete() private$prep_x() + + if(oobag && nrow(private$x) != self$n_obs){ + stop("input data must have ", self$n_obs, " observations to ", + "compute out-of-bag predictions.", call. = FALSE) + } + # y and w do not need to be prepped for prediction, # but they need to match orsf_cpp()'s expectations private$y <- matrix(0, nrow = 1, ncol = 1) @@ -3877,7 +3879,6 @@ ObliqueForestSurvival <- R6::R6Class( clean_pred_oobag_internal = function(){ - # put the oob predictions into the same order as the training data. unsorted <- collapse::radixorder(private$data_row_sort) self$pred_oobag <- self$pred_oobag[unsorted, , drop = FALSE] @@ -3939,7 +3940,10 @@ ObliqueForestSurvival <- R6::R6Class( # must sort if oobag b/c when oobag_rows were originally created, # it was after the data had been sorted. - if(oobag) private$sort_inputs(sort_y = FALSE) + if(oobag){ + private$sort_inputs(sort_y = FALSE) + unsorted <- collapse::radixorder(private$data_row_sort) + } cpp_args = private$prep_cpp_args(x = private$x, y = private$y, @@ -3963,6 +3967,12 @@ ObliqueForestSurvival <- R6::R6Class( results[[i]] <- do.call(orsf_cpp, args = cpp_args)$pred_new + if(oobag){ + # put the oob predictions into the same order as the training data. + results[[i]] <- results[[i]][unsorted, , drop = FALSE] + } + + results[[i]] <- private$clean_pred_new(results[[i]]) } @@ -3983,9 +3993,7 @@ ObliqueForestSurvival <- R6::R6Class( if(oobag){ # put the oob predictions into the same order as the training data. - unsorted <- collapse::radixorder(private$data_row_sort) out_values <- out_values[unsorted, , drop = FALSE] - } private$clean_pred_new(out_values)