diff --git a/Rmd/orsf-fit-intro.Rmd b/Rmd/orsf-fit-intro.Rmd index c10a536a..a851205d 100644 --- a/Rmd/orsf-fit-intro.Rmd +++ b/Rmd/orsf-fit-intro.Rmd @@ -23,7 +23,7 @@ bill_fit ``` -My personal favorite is the oblique survival RF with accelerated Cox regression because it was the first type of oblique RF that `aorsf` provided (see [JCGS paper](https://www.tandfonline.com/doi/full/10.1080/10618600.2023.2231048)). Here, we use it to predict mortality risk following diagnosis of primary biliary cirrhosis: +My personal favorite is the oblique survival RF with accelerated Cox regression because it was the first type of oblique RF that `aorsf` provided (see [ArXiv paper](https://arxiv.org/abs/2208.01129); the paper is also published in *Journal of Computational and Graphical Statistics* but is not publicly available there). Here, we use it to predict mortality risk following diagnosis of primary biliary cirrhosis: ```{r} # An oblique survival RF diff --git a/man/orsf.Rd b/man/orsf.Rd index 04c5210c..311d5ec7 100644 --- a/man/orsf.Rd +++ b/man/orsf.Rd @@ -388,7 +388,7 @@ penguin_fit ## N trees: 5 ## N predictors total: 7 ## N predictors per node: 3 -## Average leaves per tree: 5.8 +## Average leaves per tree: 5.6 ## Min observations in leaf: 5 ## OOB stat value: 0.98 ## OOB stat type: AUC-ROC @@ -415,9 +415,9 @@ bill_fit ## N trees: 5 ## N predictors total: 7 ## N predictors per node: 3 -## Average leaves per tree: 52.2 +## Average leaves per tree: 50.8 ## Min observations in leaf: 5 -## OOB stat value: 0.75 +## OOB stat value: 0.74 ## OOB stat type: RSQ ## Variable importance: anova ## @@ -426,9 +426,10 @@ bill_fit My personal favorite is the oblique survival RF with accelerated Cox regression because it was the first type of oblique RF that \code{aorsf} -provided (see \href{https://www.tandfonline.com/doi/full/10.1080/10618600.2023.2231048}{JCGS paper}). -Here, we use it to predict mortality risk following diagnosis of primary -biliary cirrhosis: +provided (see \href{https://arxiv.org/abs/2208.01129}{ArXiv paper}; the paper +is also published in \emph{Journal of Computational and Graphical Statistics} +but is not publicly available there). Here, we use it to predict +mortality risk following diagnosis of primary biliary cirrhosis: \if{html}{\out{
}}\preformatted{# An oblique survival RF pbc_fit <- orsf(data = pbc_orsf, @@ -449,7 +450,7 @@ pbc_fit ## Average leaves per tree: 21.6 ## Min observations in leaf: 5 ## Min events in leaf: 1 -## OOB stat value: 0.75 +## OOB stat value: 0.78 ## OOB stat type: Harrell's C-index ## Variable importance: anova ## @@ -496,7 +497,7 @@ take to fit the forest before you commit to it: orsf_time_to_train() }\if{html}{\out{
}} -\if{html}{\out{
}}\preformatted{## Time difference of 2.182388 secs +\if{html}{\out{
}}\preformatted{## Time difference of 1.9964 secs }\if{html}{\out{
}} \enumerate{ \item If fitting multiple forests, use the blueprint along with @@ -567,11 +568,11 @@ brier_scores \if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4 ## .metric .estimator .eval_time .estimate ## -## 1 brier_survival standard 500 0.0568 -## 2 brier_survival standard 1000 0.0932 -## 3 brier_survival standard 1500 0.0942 -## 4 brier_survival standard 2000 0.0949 -## 5 brier_survival standard 2500 0.135 +## 1 brier_survival standard 500 0.0452 +## 2 brier_survival standard 1000 0.0699 +## 3 brier_survival standard 1500 0.0865 +## 4 brier_survival standard 2000 0.0930 +## 5 brier_survival standard 2500 0.127 ## 6 brier_survival standard 3000 0.184 }\if{html}{\out{
}} @@ -584,12 +585,12 @@ roc_scores \if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4 ## .metric .estimator .eval_time .estimate ## -## 1 roc_auc_survival standard 500 0.916 -## 2 roc_auc_survival standard 1000 0.906 -## 3 roc_auc_survival standard 1500 0.937 -## 4 roc_auc_survival standard 2000 0.954 -## 5 roc_auc_survival standard 2500 0.909 -## 6 roc_auc_survival standard 3000 0.855 +## 1 roc_auc_survival standard 500 0.984 +## 2 roc_auc_survival standard 1000 0.918 +## 3 roc_auc_survival standard 1500 0.912 +## 4 roc_auc_survival standard 2000 0.923 +## 5 roc_auc_survival standard 2500 0.904 +## 6 roc_auc_survival standard 3000 0.842 }\if{html}{\out{
}} } } diff --git a/man/orsf_time_to_train.Rd b/man/orsf_time_to_train.Rd index 855c922a..09f5ffd5 100644 --- a/man/orsf_time_to_train.Rd +++ b/man/orsf_time_to_train.Rd @@ -4,14 +4,15 @@ \alias{orsf_time_to_train} \title{Estimate training time} \usage{ -orsf_time_to_train(object, n_tree_subset = 50) +orsf_time_to_train(object, n_tree_subset = NULL) } \arguments{ \item{object}{an untrained \code{aorsf} object} \item{n_tree_subset}{(\emph{integer}) how many trees should be fit in order -to estimate the time needed to train \code{object}. The default value is 50, -as this usually gives a good enough approximation.} +to estimate the time needed to train \code{object}. The default value is 10\% +of the trees specified in \code{object}. I.e., if \code{object} has \code{n_tree} of +500, then the default value \code{n_tree_subset} is 50.} } \value{ a \link{difftime} object. @@ -25,8 +26,8 @@ Estimate training time object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 10, no_fit = TRUE) -# approximate the time it will take to grow 500 trees -time_estimated <- orsf_time_to_train(object) +# approximate the time it will take to grow 10 trees +time_estimated <- orsf_time_to_train(object, n_tree_subset=1) print(time_estimated) diff --git a/man/orsf_update.Rd b/man/orsf_update.Rd index f0f7d9c8..a3410e68 100644 --- a/man/orsf_update.Rd +++ b/man/orsf_update.Rd @@ -77,6 +77,7 @@ that would occur if the dynamic outputs were not re-initialized. } \examples{ +\dontrun{ # initial fit has mtry of 5 fit <- orsf(pbc_orsf, time + status ~ . -id) @@ -85,6 +86,7 @@ fit_new <- orsf_update(fit, formula = . ~ . - edema, n_tree = 100) # prevent dynamic updates by specifying inputs you want to freeze. fit_newer <- orsf_update(fit_new, mtry = 2) +} }