From 233e4fa11b7231f115aff16e2806efe6e0851c94 Mon Sep 17 00:00:00 2001
From: byron jaeger <byron.jaeger@gmail.com>
Date: Sun, 12 Nov 2023 22:24:25 -0500
Subject: [PATCH] fixup for cran check

---
 R/orsf_control.R           |  20 --------
 Rmd/orsf_examples.Rmd      |  24 ++++++---
 man/orsf.Rd                |  26 +++++++---
 man/orsf_control_cph.Rd    |   7 ---
 man/orsf_control_fast.Rd   |   7 ---
 man/orsf_control_net.Rd    |  11 ----
 tests/testthat/test-orsf.R | 102 ++++++++++++++++++-------------------
 7 files changed, 86 insertions(+), 111 deletions(-)

diff --git a/R/orsf_control.R b/R/orsf_control.R
index 95f3878e..0875371c 100644
--- a/R/orsf_control.R
+++ b/R/orsf_control.R
@@ -36,11 +36,6 @@
 #'  on the scale of your data, which is why the default value is `TRUE`.
 #'
 #'
-#' @examples
-#'
-#' orsf(data = pbc_orsf,
-#'      formula = Surv(time, status) ~ . - id,
-#'      control = orsf_control_fast())
 #'
 orsf_control_fast <- function(method = 'efron',
                               do_scale = TRUE,
@@ -115,11 +110,6 @@ orsf_control_fast <- function(method = 'efron',
 #'   Data: Extending the Cox Model. Statistics for Biology and Health.
 #'   Springer, New York, NY. DOI: 10.1007/978-1-4757-3294-8_3
 #'
-#' @examples
-#'
-#' orsf(data = pbc_orsf,
-#'      formula = Surv(time, status) ~ . - id,
-#'      control = orsf_control_cph())
 #'
 orsf_control_cph <- function(method = 'efron',
                              eps = 1e-9,
@@ -186,16 +176,6 @@ orsf_control_cph <- function(method = 'efron',
 #'
 #' `r roxy_cite_simon_2011()`
 #'
-#' @examples
-#'
-#' # orsf_control_net() is considerably slower than orsf_control_cph(),
-#' # The example uses n_tree = 25 so that my examples run faster,
-#' # but you should use at least 500 trees in applied settings.
-#'
-#' orsf(data = pbc_orsf,
-#'      formula = Surv(time, status) ~ . - id,
-#'      n_tree = 25,
-#'      control = orsf_control_net())
 
 orsf_control_net <- function(alpha = 1/2,
                              df_target = NULL,
diff --git a/Rmd/orsf_examples.Rmd b/Rmd/orsf_examples.Rmd
index cd97e7c2..36921682 100644
--- a/Rmd/orsf_examples.Rmd
+++ b/Rmd/orsf_examples.Rmd
@@ -50,7 +50,7 @@ The accelerated ORSF ensemble is the default because it has a nice balance of co
 ```{r}
 
 fit_accel <- orsf(pbc_orsf, 
-                  control = orsf_control_fast(),
+                  control = orsf_control_survival(),
                   formula = Surv(time, status) ~ . - id,
                   tree_seeds = 329)
 
@@ -62,8 +62,12 @@ fit_accel <- orsf(pbc_orsf,
 
 ```{r}
 
+control_cph <- orsf_control_survival(method = 'glm', 
+                                     scale_x = TRUE, 
+                                     max_iter = 20)
+
 fit_cph <- orsf(pbc_orsf, 
-                control = orsf_control_cph(),
+                control = control_cph,
                 formula = Surv(time, status) ~ . - id,
                 tree_seeds = 329)
 
@@ -77,8 +81,11 @@ fit_cph <- orsf(pbc_orsf,
 
 # select 3 predictors out of 5 to be used in
 # each linear combination of predictors.
+
+control_net <- orsf_control_survival(method = 'net', target_df = 3)
+
 fit_net <- orsf(pbc_orsf, 
-                control = orsf_control_net(df_target = 3),
+                control = control_net,
                 formula = Surv(time, status) ~ . - id,
                 tree_seeds = 329)
 
@@ -157,16 +164,16 @@ We can plug these functions into `orsf_control_custom()`, and then pass the resu
 
 fit_rando <- orsf(pbc_orsf,
                   Surv(time, status) ~ . - id,
-                  control = orsf_control_custom(beta_fun = f_rando),
+                  control = orsf_control_survival(method = f_rando),
                   tree_seeds = 329)
 
 fit_pca <- orsf(pbc_orsf,
                 Surv(time, status) ~ . - id,
-                control = orsf_control_custom(beta_fun = f_pca),
+                control = orsf_control_survival(method = f_pca),
                 tree_seeds = 329)
 
 fit_rlt <- orsf(pbc_orsf, time + status ~ . - id, 
-                control = orsf_control_custom(beta_fun = f_aorsf),
+                control = orsf_control_survival(method = f_aorsf),
                 tree_seeds = 329)
 
 ```
@@ -221,6 +228,7 @@ Start with a recipe to pre-process data
 ```{r}
 
 imputer <- recipe(pbc_orsf, formula = time + status ~ .) %>% 
+ step_rm(id) %>% 
  step_impute_mean(all_numeric_predictors()) %>%
  step_impute_mode(all_nominal_predictors()) 
 
@@ -268,7 +276,9 @@ aorsf_wf <- function(train, test, pred_horizon){
  
  train %>% 
   orsf(Surv(time, status) ~ .,) %>% 
-  predict(new_data = test, pred_horizon = pred_horizon) %>% 
+  predict(new_data = test, 
+          pred_type = 'risk',
+          pred_horizon = pred_horizon) %>% 
   as.numeric()
  
 }
diff --git a/man/orsf.Rd b/man/orsf.Rd
index f6a74d23..3f3a43ec 100644
--- a/man/orsf.Rd
+++ b/man/orsf.Rd
@@ -389,7 +389,7 @@ iteration of Newton Raphson scoring on the Cox partial likelihood
 function to find linear combinations of predictors.
 
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{fit_accel <- orsf(pbc_orsf, 
-                  control = orsf_control_fast(),
+                  control = orsf_control_survival(),
                   formula = Surv(time, status) ~ . - id,
                   tree_seeds = 329)
 }\if{html}{\out{</div>}}
@@ -401,8 +401,12 @@ function to find linear combinations of predictors.
 survival tree, using the regression coefficients to create linear
 combinations of predictors:
 
-\if{html}{\out{<div class="sourceCode r">}}\preformatted{fit_cph <- orsf(pbc_orsf, 
-                control = orsf_control_cph(),
+\if{html}{\out{<div class="sourceCode r">}}\preformatted{control_cph <- orsf_control_survival(method = 'glm', 
+                                     scale_x = TRUE, 
+                                     max_iter = 20)
+
+fit_cph <- orsf(pbc_orsf, 
+                control = control_cph,
                 formula = Surv(time, status) ~ . - id,
                 tree_seeds = 329)
 }\if{html}{\out{</div>}}
@@ -418,8 +422,11 @@ than the other options.
 
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{# select 3 predictors out of 5 to be used in
 # each linear combination of predictors.
+
+control_net <- orsf_control_survival(method = 'net', target_df = 3)
+
 fit_net <- orsf(pbc_orsf, 
-                control = orsf_control_net(df_target = 3),
+                control = control_net,
                 formula = Surv(time, status) ~ . - id,
                 tree_seeds = 329)
 }\if{html}{\out{</div>}}
@@ -489,16 +496,16 @@ the result into \code{orsf()}:
 
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{fit_rando <- orsf(pbc_orsf,
                   Surv(time, status) ~ . - id,
-                  control = orsf_control_custom(beta_fun = f_rando),
+                  control = orsf_control_survival(method = f_rando),
                   tree_seeds = 329)
 
 fit_pca <- orsf(pbc_orsf,
                 Surv(time, status) ~ . - id,
-                control = orsf_control_custom(beta_fun = f_pca),
+                control = orsf_control_survival(method = f_pca),
                 tree_seeds = 329)
 
 fit_rlt <- orsf(pbc_orsf, time + status ~ . - id, 
-                control = orsf_control_custom(beta_fun = f_aorsf),
+                control = orsf_control_survival(method = f_aorsf),
                 tree_seeds = 329)
 }\if{html}{\out{</div>}}
 
@@ -571,6 +578,7 @@ if that happens!
 Start with a recipe to pre-process data
 
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{imputer <- recipe(pbc_orsf, formula = time + status ~ .) \%>\% 
+ step_rm(id) \%>\% 
  step_impute_mean(all_numeric_predictors()) \%>\%
  step_impute_mode(all_nominal_predictors()) 
 }\if{html}{\out{</div>}}
@@ -629,7 +637,9 @@ aorsf_wf <- function(train, test, pred_horizon)\{
  
  train \%>\% 
   orsf(Surv(time, status) ~ .,) \%>\% 
-  predict(new_data = test, pred_horizon = pred_horizon) \%>\% 
+  predict(new_data = test, 
+          pred_type = 'risk',
+          pred_horizon = pred_horizon) \%>\% 
   as.numeric()
  
 \}
diff --git a/man/orsf_control_cph.Rd b/man/orsf_control_cph.Rd
index c1a38efb..6f09a551 100644
--- a/man/orsf_control_cph.Rd
+++ b/man/orsf_control_cph.Rd
@@ -44,13 +44,6 @@ was modified to make this routine.
 
 For more details on the Cox proportional hazards model, see
 \link[survival:coxph]{coxph} and/or Therneau and Grambsch (2000).
-}
-\examples{
-
-orsf(data = pbc_orsf,
-     formula = Surv(time, status) ~ . - id,
-     control = orsf_control_cph())
-
 }
 \references{
 Therneau T.M., Grambsch P.M. (2000) The Cox Model. In: Modeling Survival
diff --git a/man/orsf_control_fast.Rd b/man/orsf_control_fast.Rd
index 9440bb97..2365bbaf 100644
--- a/man/orsf_control_fast.Rd
+++ b/man/orsf_control_fast.Rd
@@ -35,13 +35,6 @@ was modified to make this routine.
 Adjust \code{do_scale} \emph{at your own risk}. Setting \code{do_scale = FALSE} will
 reduce computation time but will also make the \code{orsf} model dependent
 on the scale of your data, which is why the default value is \code{TRUE}.
-}
-\examples{
-
-orsf(data = pbc_orsf,
-     formula = Surv(time, status) ~ . - id,
-     control = orsf_control_fast())
-
 }
 \seealso{
 linear combination control functions
diff --git a/man/orsf_control_net.Rd b/man/orsf_control_net.Rd
index 427c7ad7..51bf1af0 100644
--- a/man/orsf_control_net.Rd
+++ b/man/orsf_control_net.Rd
@@ -31,17 +31,6 @@ combinations of input variables while fitting an \link{orsf} model.
 \link{orsf} that indicates the number of variables chosen at random prior to
 finding a linear combination of those variables.
 }
-\examples{
-
-# orsf_control_net() is considerably slower than orsf_control_cph(),
-# The example uses n_tree = 25 so that my examples run faster,
-# but you should use at least 500 trees in applied settings.
-
-orsf(data = pbc_orsf,
-     formula = Surv(time, status) ~ . - id,
-     n_tree = 25,
-     control = orsf_control_net())
-}
 \references{
 Simon N, Friedman J, Hastie T, Tibshirani R. Regularization paths for Cox's proportional hazards model via coordinate descent. \emph{Journal of statistical software} 2011 Mar; 39(5):1. DOI: 10.18637/jss.v039.i05
 }
diff --git a/tests/testthat/test-orsf.R b/tests/testthat/test-orsf.R
index d93121ab..61aacd60 100644
--- a/tests/testthat/test-orsf.R
+++ b/tests/testthat/test-orsf.R
@@ -110,18 +110,18 @@ test_that(
   pbc_list_bad$trt <- pbc_list_bad$trt[1:3]
   pbc_list_bad$age <- pbc_list_bad$age[1:5]
 
-  skip_on_cran() # I don't want to list recipes in suggests
-
-  recipe <- recipes::recipe(pbc_orsf, formula = time + status ~ .) %>%
-   recipes::step_rm(id)
-
-  recipe_prepped <- recipes::prep(recipe)
-
-  fit_recipe <- orsf(recipe_prepped, Surv(time, status) ~ .,
-                     n_tree = n_tree_test,
-                     tree_seeds = seeds_standard)
-
-  expect_equal_leaf_summary(fit_recipe, fit_standard_pbc$fast)
+  # skip() # I don't want to list recipes in suggests
+  #
+  # recipe <- recipes::recipe(pbc_orsf, formula = time + status ~ .) %>%
+  #  recipes::step_rm(id)
+  #
+  # recipe_prepped <- recipes::prep(recipe)
+  #
+  # fit_recipe <- orsf(recipe_prepped, Surv(time, status) ~ .,
+  #                    n_tree = n_tree_test,
+  #                    tree_seeds = seeds_standard)
+  #
+  # expect_equal_leaf_summary(fit_recipe, fit_standard_pbc$fast)
 
   fit_list <- orsf(pbc_list,
                    Surv(time, status) ~ . - id,
@@ -388,45 +388,45 @@ test_that(
    as.numeric(fit$eval_oobag$stat_values)
   )
 
-  skip_on_cran() # don't want to suggest yardstick or Hmisc
-
-  oobag_rsq_eval <- function(y_mat, w_vec, s_vec){
-
-   yardstick::rsq_trad_vec(truth = as.numeric(y_mat),
-                           estimate = as.numeric(s_vec),
-                           case_weights = as.numeric(w_vec))
-  }
-
-  fit <- orsf(data = mtcars,
-              formula = mpg ~ .,
-              n_tree = n_tree_test,
-              oobag_fun = oobag_rsq_eval,
-              tree_seeds = seeds_standard)
-
-  expect_equal(
-   fit$eval_oobag$stat_values[1,1],
-   yardstick::rsq_trad_vec(truth = as.numeric(mtcars$mpg),
-                           estimate = as.numeric(fit$pred_oobag),
-                           case_weights = rep(1, nrow(mtcars)))
-  )
-
-  oobag_cstat_clsf <- function(y_mat, w_vec, s_vec){
-
-   y_vec = as.numeric(y_mat)
-   cstat <- Hmisc::somers2(x = s_vec,
-                           y = y_vec,
-                           weights = w_vec)['C']
-   cstat
-
-  }
-
-  fit <- orsf(data = penguins,
-              formula = species ~ .,
-              n_tree = n_tree_test,
-              oobag_fun = oobag_cstat_clsf,
-              tree_seeds = seeds_standard)
-
-  expect_equal_oobag_eval(fit, fit_standard_penguins$fast)
+  # skip() # don't want to suggest yardstick or Hmisc
+  #
+  # oobag_rsq_eval <- function(y_mat, w_vec, s_vec){
+  #
+  #  yardstick::rsq_trad_vec(truth = as.numeric(y_mat),
+  #                          estimate = as.numeric(s_vec),
+  #                          case_weights = as.numeric(w_vec))
+  # }
+  #
+  # fit <- orsf(data = mtcars,
+  #             formula = mpg ~ .,
+  #             n_tree = n_tree_test,
+  #             oobag_fun = oobag_rsq_eval,
+  #             tree_seeds = seeds_standard)
+  #
+  # expect_equal(
+  #  fit$eval_oobag$stat_values[1,1],
+  #  yardstick::rsq_trad_vec(truth = as.numeric(mtcars$mpg),
+  #                          estimate = as.numeric(fit$pred_oobag),
+  #                          case_weights = rep(1, nrow(mtcars)))
+  # )
+  #
+  # oobag_cstat_clsf <- function(y_mat, w_vec, s_vec){
+  #
+  #  y_vec = as.numeric(y_mat)
+  #  cstat <- Hmisc::somers2(x = s_vec,
+  #                          y = y_vec,
+  #                          weights = w_vec)['C']
+  #  cstat
+  #
+  # }
+  #
+  # fit <- orsf(data = penguins,
+  #             formula = species ~ .,
+  #             n_tree = n_tree_test,
+  #             oobag_fun = oobag_cstat_clsf,
+  #             tree_seeds = seeds_standard)
+  #
+  # expect_equal_oobag_eval(fit, fit_standard_penguins$fast)
 
 
  }