From 233e4fa11b7231f115aff16e2806efe6e0851c94 Mon Sep 17 00:00:00 2001 From: byron jaeger Date: Sun, 12 Nov 2023 22:24:25 -0500 Subject: [PATCH] fixup for cran check --- R/orsf_control.R | 20 -------- Rmd/orsf_examples.Rmd | 24 ++++++--- man/orsf.Rd | 26 +++++++--- man/orsf_control_cph.Rd | 7 --- man/orsf_control_fast.Rd | 7 --- man/orsf_control_net.Rd | 11 ---- tests/testthat/test-orsf.R | 102 ++++++++++++++++++------------------- 7 files changed, 86 insertions(+), 111 deletions(-) diff --git a/R/orsf_control.R b/R/orsf_control.R index 95f3878e..0875371c 100644 --- a/R/orsf_control.R +++ b/R/orsf_control.R @@ -36,11 +36,6 @@ #' on the scale of your data, which is why the default value is `TRUE`. #' #' -#' @examples -#' -#' orsf(data = pbc_orsf, -#' formula = Surv(time, status) ~ . - id, -#' control = orsf_control_fast()) #' orsf_control_fast <- function(method = 'efron', do_scale = TRUE, @@ -115,11 +110,6 @@ orsf_control_fast <- function(method = 'efron', #' Data: Extending the Cox Model. Statistics for Biology and Health. #' Springer, New York, NY. DOI: 10.1007/978-1-4757-3294-8_3 #' -#' @examples -#' -#' orsf(data = pbc_orsf, -#' formula = Surv(time, status) ~ . - id, -#' control = orsf_control_cph()) #' orsf_control_cph <- function(method = 'efron', eps = 1e-9, @@ -186,16 +176,6 @@ orsf_control_cph <- function(method = 'efron', #' #' `r roxy_cite_simon_2011()` #' -#' @examples -#' -#' # orsf_control_net() is considerably slower than orsf_control_cph(), -#' # The example uses n_tree = 25 so that my examples run faster, -#' # but you should use at least 500 trees in applied settings. -#' -#' orsf(data = pbc_orsf, -#' formula = Surv(time, status) ~ . - id, -#' n_tree = 25, -#' control = orsf_control_net()) orsf_control_net <- function(alpha = 1/2, df_target = NULL, diff --git a/Rmd/orsf_examples.Rmd b/Rmd/orsf_examples.Rmd index cd97e7c2..36921682 100644 --- a/Rmd/orsf_examples.Rmd +++ b/Rmd/orsf_examples.Rmd @@ -50,7 +50,7 @@ The accelerated ORSF ensemble is the default because it has a nice balance of co ```{r} fit_accel <- orsf(pbc_orsf, - control = orsf_control_fast(), + control = orsf_control_survival(), formula = Surv(time, status) ~ . - id, tree_seeds = 329) @@ -62,8 +62,12 @@ fit_accel <- orsf(pbc_orsf, ```{r} +control_cph <- orsf_control_survival(method = 'glm', + scale_x = TRUE, + max_iter = 20) + fit_cph <- orsf(pbc_orsf, - control = orsf_control_cph(), + control = control_cph, formula = Surv(time, status) ~ . - id, tree_seeds = 329) @@ -77,8 +81,11 @@ fit_cph <- orsf(pbc_orsf, # select 3 predictors out of 5 to be used in # each linear combination of predictors. + +control_net <- orsf_control_survival(method = 'net', target_df = 3) + fit_net <- orsf(pbc_orsf, - control = orsf_control_net(df_target = 3), + control = control_net, formula = Surv(time, status) ~ . - id, tree_seeds = 329) @@ -157,16 +164,16 @@ We can plug these functions into `orsf_control_custom()`, and then pass the resu fit_rando <- orsf(pbc_orsf, Surv(time, status) ~ . - id, - control = orsf_control_custom(beta_fun = f_rando), + control = orsf_control_survival(method = f_rando), tree_seeds = 329) fit_pca <- orsf(pbc_orsf, Surv(time, status) ~ . - id, - control = orsf_control_custom(beta_fun = f_pca), + control = orsf_control_survival(method = f_pca), tree_seeds = 329) fit_rlt <- orsf(pbc_orsf, time + status ~ . - id, - control = orsf_control_custom(beta_fun = f_aorsf), + control = orsf_control_survival(method = f_aorsf), tree_seeds = 329) ``` @@ -221,6 +228,7 @@ Start with a recipe to pre-process data ```{r} imputer <- recipe(pbc_orsf, formula = time + status ~ .) %>% + step_rm(id) %>% step_impute_mean(all_numeric_predictors()) %>% step_impute_mode(all_nominal_predictors()) @@ -268,7 +276,9 @@ aorsf_wf <- function(train, test, pred_horizon){ train %>% orsf(Surv(time, status) ~ .,) %>% - predict(new_data = test, pred_horizon = pred_horizon) %>% + predict(new_data = test, + pred_type = 'risk', + pred_horizon = pred_horizon) %>% as.numeric() } diff --git a/man/orsf.Rd b/man/orsf.Rd index f6a74d23..3f3a43ec 100644 --- a/man/orsf.Rd +++ b/man/orsf.Rd @@ -389,7 +389,7 @@ iteration of Newton Raphson scoring on the Cox partial likelihood function to find linear combinations of predictors. \if{html}{\out{
}}\preformatted{fit_accel <- orsf(pbc_orsf, - control = orsf_control_fast(), + control = orsf_control_survival(), formula = Surv(time, status) ~ . - id, tree_seeds = 329) }\if{html}{\out{
}} @@ -401,8 +401,12 @@ function to find linear combinations of predictors. survival tree, using the regression coefficients to create linear combinations of predictors: -\if{html}{\out{
}}\preformatted{fit_cph <- orsf(pbc_orsf, - control = orsf_control_cph(), +\if{html}{\out{
}}\preformatted{control_cph <- orsf_control_survival(method = 'glm', + scale_x = TRUE, + max_iter = 20) + +fit_cph <- orsf(pbc_orsf, + control = control_cph, formula = Surv(time, status) ~ . - id, tree_seeds = 329) }\if{html}{\out{
}} @@ -418,8 +422,11 @@ than the other options. \if{html}{\out{
}}\preformatted{# select 3 predictors out of 5 to be used in # each linear combination of predictors. + +control_net <- orsf_control_survival(method = 'net', target_df = 3) + fit_net <- orsf(pbc_orsf, - control = orsf_control_net(df_target = 3), + control = control_net, formula = Surv(time, status) ~ . - id, tree_seeds = 329) }\if{html}{\out{
}} @@ -489,16 +496,16 @@ the result into \code{orsf()}: \if{html}{\out{
}}\preformatted{fit_rando <- orsf(pbc_orsf, Surv(time, status) ~ . - id, - control = orsf_control_custom(beta_fun = f_rando), + control = orsf_control_survival(method = f_rando), tree_seeds = 329) fit_pca <- orsf(pbc_orsf, Surv(time, status) ~ . - id, - control = orsf_control_custom(beta_fun = f_pca), + control = orsf_control_survival(method = f_pca), tree_seeds = 329) fit_rlt <- orsf(pbc_orsf, time + status ~ . - id, - control = orsf_control_custom(beta_fun = f_aorsf), + control = orsf_control_survival(method = f_aorsf), tree_seeds = 329) }\if{html}{\out{
}} @@ -571,6 +578,7 @@ if that happens! Start with a recipe to pre-process data \if{html}{\out{
}}\preformatted{imputer <- recipe(pbc_orsf, formula = time + status ~ .) \%>\% + step_rm(id) \%>\% step_impute_mean(all_numeric_predictors()) \%>\% step_impute_mode(all_nominal_predictors()) }\if{html}{\out{
}} @@ -629,7 +637,9 @@ aorsf_wf <- function(train, test, pred_horizon)\{ train \%>\% orsf(Surv(time, status) ~ .,) \%>\% - predict(new_data = test, pred_horizon = pred_horizon) \%>\% + predict(new_data = test, + pred_type = 'risk', + pred_horizon = pred_horizon) \%>\% as.numeric() \} diff --git a/man/orsf_control_cph.Rd b/man/orsf_control_cph.Rd index c1a38efb..6f09a551 100644 --- a/man/orsf_control_cph.Rd +++ b/man/orsf_control_cph.Rd @@ -44,13 +44,6 @@ was modified to make this routine. For more details on the Cox proportional hazards model, see \link[survival:coxph]{coxph} and/or Therneau and Grambsch (2000). -} -\examples{ - -orsf(data = pbc_orsf, - formula = Surv(time, status) ~ . - id, - control = orsf_control_cph()) - } \references{ Therneau T.M., Grambsch P.M. (2000) The Cox Model. In: Modeling Survival diff --git a/man/orsf_control_fast.Rd b/man/orsf_control_fast.Rd index 9440bb97..2365bbaf 100644 --- a/man/orsf_control_fast.Rd +++ b/man/orsf_control_fast.Rd @@ -35,13 +35,6 @@ was modified to make this routine. Adjust \code{do_scale} \emph{at your own risk}. Setting \code{do_scale = FALSE} will reduce computation time but will also make the \code{orsf} model dependent on the scale of your data, which is why the default value is \code{TRUE}. -} -\examples{ - -orsf(data = pbc_orsf, - formula = Surv(time, status) ~ . - id, - control = orsf_control_fast()) - } \seealso{ linear combination control functions diff --git a/man/orsf_control_net.Rd b/man/orsf_control_net.Rd index 427c7ad7..51bf1af0 100644 --- a/man/orsf_control_net.Rd +++ b/man/orsf_control_net.Rd @@ -31,17 +31,6 @@ combinations of input variables while fitting an \link{orsf} model. \link{orsf} that indicates the number of variables chosen at random prior to finding a linear combination of those variables. } -\examples{ - -# orsf_control_net() is considerably slower than orsf_control_cph(), -# The example uses n_tree = 25 so that my examples run faster, -# but you should use at least 500 trees in applied settings. - -orsf(data = pbc_orsf, - formula = Surv(time, status) ~ . - id, - n_tree = 25, - control = orsf_control_net()) -} \references{ Simon N, Friedman J, Hastie T, Tibshirani R. Regularization paths for Cox's proportional hazards model via coordinate descent. \emph{Journal of statistical software} 2011 Mar; 39(5):1. DOI: 10.18637/jss.v039.i05 } diff --git a/tests/testthat/test-orsf.R b/tests/testthat/test-orsf.R index d93121ab..61aacd60 100644 --- a/tests/testthat/test-orsf.R +++ b/tests/testthat/test-orsf.R @@ -110,18 +110,18 @@ test_that( pbc_list_bad$trt <- pbc_list_bad$trt[1:3] pbc_list_bad$age <- pbc_list_bad$age[1:5] - skip_on_cran() # I don't want to list recipes in suggests - - recipe <- recipes::recipe(pbc_orsf, formula = time + status ~ .) %>% - recipes::step_rm(id) - - recipe_prepped <- recipes::prep(recipe) - - fit_recipe <- orsf(recipe_prepped, Surv(time, status) ~ ., - n_tree = n_tree_test, - tree_seeds = seeds_standard) - - expect_equal_leaf_summary(fit_recipe, fit_standard_pbc$fast) + # skip() # I don't want to list recipes in suggests + # + # recipe <- recipes::recipe(pbc_orsf, formula = time + status ~ .) %>% + # recipes::step_rm(id) + # + # recipe_prepped <- recipes::prep(recipe) + # + # fit_recipe <- orsf(recipe_prepped, Surv(time, status) ~ ., + # n_tree = n_tree_test, + # tree_seeds = seeds_standard) + # + # expect_equal_leaf_summary(fit_recipe, fit_standard_pbc$fast) fit_list <- orsf(pbc_list, Surv(time, status) ~ . - id, @@ -388,45 +388,45 @@ test_that( as.numeric(fit$eval_oobag$stat_values) ) - skip_on_cran() # don't want to suggest yardstick or Hmisc - - oobag_rsq_eval <- function(y_mat, w_vec, s_vec){ - - yardstick::rsq_trad_vec(truth = as.numeric(y_mat), - estimate = as.numeric(s_vec), - case_weights = as.numeric(w_vec)) - } - - fit <- orsf(data = mtcars, - formula = mpg ~ ., - n_tree = n_tree_test, - oobag_fun = oobag_rsq_eval, - tree_seeds = seeds_standard) - - expect_equal( - fit$eval_oobag$stat_values[1,1], - yardstick::rsq_trad_vec(truth = as.numeric(mtcars$mpg), - estimate = as.numeric(fit$pred_oobag), - case_weights = rep(1, nrow(mtcars))) - ) - - oobag_cstat_clsf <- function(y_mat, w_vec, s_vec){ - - y_vec = as.numeric(y_mat) - cstat <- Hmisc::somers2(x = s_vec, - y = y_vec, - weights = w_vec)['C'] - cstat - - } - - fit <- orsf(data = penguins, - formula = species ~ ., - n_tree = n_tree_test, - oobag_fun = oobag_cstat_clsf, - tree_seeds = seeds_standard) - - expect_equal_oobag_eval(fit, fit_standard_penguins$fast) + # skip() # don't want to suggest yardstick or Hmisc + # + # oobag_rsq_eval <- function(y_mat, w_vec, s_vec){ + # + # yardstick::rsq_trad_vec(truth = as.numeric(y_mat), + # estimate = as.numeric(s_vec), + # case_weights = as.numeric(w_vec)) + # } + # + # fit <- orsf(data = mtcars, + # formula = mpg ~ ., + # n_tree = n_tree_test, + # oobag_fun = oobag_rsq_eval, + # tree_seeds = seeds_standard) + # + # expect_equal( + # fit$eval_oobag$stat_values[1,1], + # yardstick::rsq_trad_vec(truth = as.numeric(mtcars$mpg), + # estimate = as.numeric(fit$pred_oobag), + # case_weights = rep(1, nrow(mtcars))) + # ) + # + # oobag_cstat_clsf <- function(y_mat, w_vec, s_vec){ + # + # y_vec = as.numeric(y_mat) + # cstat <- Hmisc::somers2(x = s_vec, + # y = y_vec, + # weights = w_vec)['C'] + # cstat + # + # } + # + # fit <- orsf(data = penguins, + # formula = species ~ ., + # n_tree = n_tree_test, + # oobag_fun = oobag_cstat_clsf, + # tree_seeds = seeds_standard) + # + # expect_equal_oobag_eval(fit, fit_standard_penguins$fast) }