diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index dbc50b70..e096d9c0 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.1.3 -Date: 2024-01-19 22:06:35 UTC -SHA: a13cec24750e0efdda2624199e8c4e48506e287b +Version: 0.1.4 +Date: 2024-05-03 19:26:08 UTC +SHA: ef7c70ebd2dc070c696af0fb4d24c066bbcef487 diff --git a/DESCRIPTION b/DESCRIPTION index bb8fc8f8..f251268b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: aorsf Title: Accelerated Oblique Random Forests -Version: 0.1.4 +Version: 0.1.4.9001 Authors@R: c( person("Byron", "Jaeger", , "bjaeger@wakehealth.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7399-2299")), diff --git a/NEWS.md b/NEWS.md index 5b38cc32..0e16427e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# aorsf 0.1.5 (unreleased) + +* fixed an issue where omitting NA values would cause an error in regression forests. + # aorsf 0.1.4 * `orsf_vs` now returns a column that contains non-reference coded variable names (see https://github.com/ropensci/aorsf/pull/52). diff --git a/R/orsf_R6.R b/R/orsf_R6.R index 29979b46..e7352e68 100644 --- a/R/orsf_R6.R +++ b/R/orsf_R6.R @@ -441,10 +441,6 @@ ObliqueForest <- R6::R6Class( # object and then use this function. We need checks for that case. new_data <- new_data %||% self$data - if(oobag && nrow(new_data) != self$n_obs){ - stop("input data must have ", self$n_obs, " observations to ", - "compute out-of-bag predictions.", call. = FALSE) - } # run checks before you assign new values to object. # otherwise, if a check throws an error, the object will @@ -476,6 +472,12 @@ ObliqueForest <- R6::R6Class( private$init_data_rows_complete() private$prep_x() + + if(oobag && nrow(private$x) != self$n_obs){ + stop("input data must have ", self$n_obs, " observations to ", + "compute out-of-bag predictions.", call. = FALSE) + } + # y and w do not need to be prepped for prediction, # but they need to match orsf_cpp()'s expectations private$y <- matrix(0, nrow = 1, ncol = 1) @@ -3153,7 +3155,7 @@ ObliqueForest <- R6::R6Class( private$y <- select_cols(self$data, private$data_names$y) if(self$na_action == 'omit' && !placeholder) - private$y <- private$y[private$data_rows_complete, ] + private$y <- private$y[private$data_rows_complete, , drop = FALSE] private$prep_y_internal(placeholder) @@ -3877,7 +3879,6 @@ ObliqueForestSurvival <- R6::R6Class( clean_pred_oobag_internal = function(){ - # put the oob predictions into the same order as the training data. unsorted <- collapse::radixorder(private$data_row_sort) self$pred_oobag <- self$pred_oobag[unsorted, , drop = FALSE] @@ -3939,7 +3940,10 @@ ObliqueForestSurvival <- R6::R6Class( # must sort if oobag b/c when oobag_rows were originally created, # it was after the data had been sorted. - if(oobag) private$sort_inputs(sort_y = FALSE) + if(oobag){ + private$sort_inputs(sort_y = FALSE) + unsorted <- collapse::radixorder(private$data_row_sort) + } cpp_args = private$prep_cpp_args(x = private$x, y = private$y, @@ -3963,6 +3967,12 @@ ObliqueForestSurvival <- R6::R6Class( results[[i]] <- do.call(orsf_cpp, args = cpp_args)$pred_new + if(oobag){ + # put the oob predictions into the same order as the training data. + results[[i]] <- results[[i]][unsorted, , drop = FALSE] + } + + results[[i]] <- private$clean_pred_new(results[[i]]) } @@ -3983,9 +3993,7 @@ ObliqueForestSurvival <- R6::R6Class( if(oobag){ # put the oob predictions into the same order as the training data. - unsorted <- collapse::radixorder(private$data_row_sort) out_values <- out_values[unsorted, , drop = FALSE] - } private$clean_pred_new(out_values) diff --git a/cran-comments.md b/cran-comments.md index 50c2d321..66bf139d 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,6 +1,2 @@ -With the help of a colleague, I was able to reproduce the undefined behavior error. I am confident that it is now resolved. - -I understand you are taking an extended break from reviewing this package and I'm comfortable waiting until the break is concluded. Thank you for everything you do. - -I saw that it took a long time to run tests on my last submission. I added rules to skip tests on CRAN that require longer run times. I hope this is helpful. +This patch release contains a few minor bug fixes. diff --git a/tests/testthat/test-na_action.R b/tests/testthat/test-na_action.R new file mode 100644 index 00000000..3049fb2e --- /dev/null +++ b/tests/testthat/test-na_action.R @@ -0,0 +1,113 @@ + +test_that( + desc = "na action of omit works with oobag preds", + code = { + + mtcars_na <- mtcars + + mtcars_na$vs <- factor(mtcars_na$vs) + + mtcars_na$disp[1] <- NA + mtcars_na$hp[3] <- NA + + regr_fit <- orsf( + data = mtcars_na, formula = mpg ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'omit' + ) + + clsf_fit <- orsf( + data = mtcars_na, formula = vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'omit' + ) + + surv_fit <- orsf( + mtcars_na, mpg + vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'omit' + ) + + expect_equal(nrow(na.omit(mtcars_na)), regr_fit$n_obs) + expect_equal(nrow(na.omit(mtcars_na)), clsf_fit$n_obs) + expect_equal(nrow(na.omit(mtcars_na)), surv_fit$n_obs) + + clsf_prd_oob <- predict(clsf_fit, oobag = TRUE) + regr_prd_oob <- predict(regr_fit, oobag = TRUE) + surv_prd_oob <- predict(surv_fit, oobag = TRUE) + + na_rows <- which(is.na(clsf_fit$pred_oobag)) + expect_true(all(is.na(clsf_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(regr_fit$pred_oobag)) + expect_true(all(is.na(regr_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(surv_fit$pred_oobag)) + expect_true(all(is.na(surv_prd_oob[na_rows, drop = FALSE]))) + + }) + + + +test_that( + desc = "na action of impute works with oobag preds", + code = { + + mtcars_na <- mtcars + + mtcars_na$vs <- factor(mtcars_na$vs) + + mtcars_na$disp[1] <- NA + mtcars_na$hp[3] <- NA + + regr_fit <- orsf( + data = mtcars_na, formula = mpg ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'impute_meanmode' + ) + + clsf_fit <- orsf( + data = mtcars_na, formula = vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'impute_meanmode' + ) + + surv_fit <- orsf( + mtcars_na, mpg + vs ~ ., + n_tree = n_tree_test, + tree_seeds = seeds_standard, + na_action = 'impute_meanmode' + ) + + expect_equal(nrow(mtcars_na), regr_fit$n_obs) + expect_equal(nrow(mtcars_na), clsf_fit$n_obs) + expect_equal(nrow(mtcars_na), surv_fit$n_obs) + + clsf_prd_oob <- predict(clsf_fit, oobag = TRUE) + regr_prd_oob <- predict(regr_fit, oobag = TRUE) + surv_prd_oob <- predict(surv_fit, oobag = TRUE) + + expect_equal(nrow(mtcars_na), regr_fit$n_obs) + expect_equal(nrow(mtcars_na), clsf_fit$n_obs) + expect_equal(nrow(mtcars_na), surv_fit$n_obs) + + clsf_prd_oob <- predict(clsf_fit, oobag = TRUE) + regr_prd_oob <- predict(regr_fit, oobag = TRUE) + surv_prd_oob <- predict(surv_fit, oobag = TRUE) + + na_rows <- which(is.na(clsf_fit$pred_oobag)) + expect_true(all(is.na(clsf_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(regr_fit$pred_oobag)) + expect_true(all(is.na(regr_prd_oob[na_rows, drop = FALSE]))) + + na_rows <- which(is.na(surv_fit$pred_oobag)) + expect_true(all(is.na(surv_prd_oob[na_rows, drop = FALSE]))) + + }) +