diff --git a/R/bibentries.R b/R/bibentries.R index 7bf1ed35..0b72ceb2 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -13,13 +13,12 @@ cite = function(entry){ volume = "45", number = "1", pages = "5--32", - doi = "10.1023/A:1010933404324", + # doi = "10.1023/A:1010933404324", issn = "1573-0565" ), ishwaran_2008 = utils::bibentry( "article", - doi = "10.1214/08-aoas169", - url = "https://doi.org/10.1214/08-aoas169", + # doi = "10.1214/08-aoas169", year = "2008", month = "9", publisher = "Institute of Mathematical Statistics", @@ -31,7 +30,7 @@ cite = function(entry){ ), jaeger_2019 = utils::bibentry( "article", - doi = "10.1214/19-aoas1261", + # doi = "10.1214/19-aoas1261", year = "2019", month = "9", publisher = "Institute of Mathematical Statistics", @@ -46,7 +45,7 @@ cite = function(entry){ title = "Accelerated and interpretable oblique random survival forests", author = "Byron C. Jaeger and Sawyer Welden and Kristin Lenoir and Jaime L. Speiser and Matthew W. Segar and Ambarish Pandey and Nicholas M. Pajewski", journal = "Journal of Computational and Graphical Statistics", - doi = "10.1080/10618600.2023.2231048", + # doi = "10.1080/10618600.2023.2231048", year = "2023", month = "8", publisher = "Taylor & Francis", diff --git a/Rmd/orsf-fit-intro.Rmd b/Rmd/orsf-fit-intro.Rmd index 1d20ebe9..c10a536a 100644 --- a/Rmd/orsf-fit-intro.Rmd +++ b/Rmd/orsf-fit-intro.Rmd @@ -23,7 +23,7 @@ bill_fit ``` -My personal favorite is the oblique survival RF with accelerated Cox regression because it was the first type of oblique RF that `aorsf` provided (see [JCGS paper](https://doi.org/10.1080/10618600.2023.2231048)). Here, we use it to predict mortality risk following diagnosis of primary biliary cirrhosis: +My personal favorite is the oblique survival RF with accelerated Cox regression because it was the first type of oblique RF that `aorsf` provided (see [JCGS paper](https://www.tandfonline.com/doi/full/10.1080/10618600.2023.2231048)). Here, we use it to predict mortality risk following diagnosis of primary biliary cirrhosis: ```{r} # An oblique survival RF diff --git a/man/orsf.Rd b/man/orsf.Rd index 51de151a..9d08ae14 100644 --- a/man/orsf.Rd +++ b/man/orsf.Rd @@ -388,9 +388,9 @@ penguin_fit ## N trees: 5 ## N predictors total: 7 ## N predictors per node: 3 -## Average leaves per tree: 6 +## Average leaves per tree: 5.8 ## Min observations in leaf: 5 -## OOB stat value: 0.98 +## OOB stat value: 0.99 ## OOB stat type: AUC-ROC ## Variable importance: anova ## @@ -415,9 +415,9 @@ bill_fit ## N trees: 5 ## N predictors total: 7 ## N predictors per node: 3 -## Average leaves per tree: 49.4 +## Average leaves per tree: 50.6 ## Min observations in leaf: 5 -## OOB stat value: 0.72 +## OOB stat value: 0.71 ## OOB stat type: RSQ ## Variable importance: anova ## @@ -426,9 +426,9 @@ bill_fit My personal favorite is the oblique survival RF with accelerated Cox regression because it was the first type of oblique RF that \code{aorsf} -provided (see \href{https://doi.org/10.1080/10618600.2023.2231048}{JCGS paper}). Here, we use it -to predict mortality risk following diagnosis of primary biliary -cirrhosis: +provided (see \href{https://www.tandfonline.com/doi/full/10.1080/10618600.2023.2231048}{JCGS paper}). +Here, we use it to predict mortality risk following diagnosis of primary +biliary cirrhosis: \if{html}{\out{
}}\preformatted{# An oblique survival RF pbc_fit <- orsf(data = pbc_orsf, @@ -446,7 +446,7 @@ pbc_fit ## N trees: 5 ## N predictors total: 17 ## N predictors per node: 5 -## Average leaves per tree: 20.2 +## Average leaves per tree: 20 ## Min observations in leaf: 5 ## Min events in leaf: 1 ## OOB stat value: 0.79 @@ -496,7 +496,7 @@ take to fit the forest before you commit to it: orsf_time_to_train() }\if{html}{\out{
}} -\if{html}{\out{
}}\preformatted{## Time difference of 3.799009 secs +\if{html}{\out{
}}\preformatted{## Time difference of 4.861784 secs }\if{html}{\out{
}} \enumerate{ \item If fitting multiple forests, use the blueprint along with @@ -567,12 +567,12 @@ brier_scores \if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4 ## .metric .estimator .eval_time .estimate ## -## 1 brier_survival standard 500 0.0339 -## 2 brier_survival standard 1000 0.108 -## 3 brier_survival standard 1500 0.112 -## 4 brier_survival standard 2000 0.120 -## 5 brier_survival standard 2500 0.105 -## 6 brier_survival standard 3000 0.135 +## 1 brier_survival standard 500 0.0522 +## 2 brier_survival standard 1000 0.111 +## 3 brier_survival standard 1500 0.125 +## 4 brier_survival standard 2000 0.102 +## 5 brier_survival standard 2500 0.196 +## 6 brier_survival standard 3000 0.240 }\if{html}{\out{
}} \if{html}{\out{
}}\preformatted{roc_scores <- test_pred \%>\% @@ -584,12 +584,12 @@ roc_scores \if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4 ## .metric .estimator .eval_time .estimate ## -## 1 roc_auc_survival standard 500 0.853 -## 2 roc_auc_survival standard 1000 0.889 -## 3 roc_auc_survival standard 1500 0.877 -## 4 roc_auc_survival standard 2000 0.906 -## 5 roc_auc_survival standard 2500 0.897 -## 6 roc_auc_survival standard 3000 0.891 +## 1 roc_auc_survival standard 500 0.947 +## 2 roc_auc_survival standard 1000 0.846 +## 3 roc_auc_survival standard 1500 0.885 +## 4 roc_auc_survival standard 2000 0.927 +## 5 roc_auc_survival standard 2500 0.807 +## 6 roc_auc_survival standard 3000 0.787 }\if{html}{\out{
}} } } @@ -599,12 +599,9 @@ roc_scores \item Harrell, E F, Califf, M R, Pryor, B D, Lee, L K, Rosati, A R (1982). "Evaluating the yield of medical tests." \emph{Jama}, \emph{247}(18), 2543-2546. \item Breiman, Leo (2001). "Random Forests." \emph{Machine Learning}, \emph{45}(1), -5-32. ISSN 1573-0565, doi:10.1023/A:1010933404324 -\url{https://doi.org/10.1023/A\%3A1010933404324}. +5-32. ISSN 1573-0565. \item Ishwaran H, Kogalur UB, Blackstone EH, Lauer MS (2008). "Random survival forests." \emph{The Annals of Applied Statistics}, \emph{2}(3). -doi:10.1214/08-aoas169 \url{https://doi.org/10.1214/08-aoas169}, -\url{https://doi.org/10.1214/08-aoas169}. \item Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, A F (2011). "On oblique random forests." In \emph{Machine Learning and Knowledge Discovery in Databases: European Conference, @@ -612,12 +609,9 @@ ECML PKDD 2011, Athens, Greece, September 5-9, 2011, Proceedings, Part II 22}, 453-469. Springer. \item Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min Y, Mcclure LA, Howard G, Simon N (2019). "Oblique random survival forests." \emph{The -Annals of Applied Statistics}, \emph{13}(3). doi:10.1214/19-aoas1261 -\url{https://doi.org/10.1214/19-aoas1261}. +Annals of Applied Statistics}, \emph{13}(3). \item Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey A, Pajewski NM (2023). "Accelerated and interpretable oblique random survival forests." \emph{Journal of Computational and Graphical Statistics}, 1-16. -doi:10.1080/10618600.2023.2231048 -\url{https://doi.org/10.1080/10618600.2023.2231048}. } } diff --git a/man/orsf_pd_oob.Rd b/man/orsf_pd_oob.Rd index 43615667..3ba68008 100644 --- a/man/orsf_pd_oob.Rd +++ b/man/orsf_pd_oob.Rd @@ -276,47 +276,12 @@ pd_new ## 3: Gentoo Biscoe 3200 42.81649 40.19221 42.55664 46.84035 ## 4: Adelie Dream 3200 40.16219 36.95895 40.34633 43.90681 ## 5: Chinstrap Dream 3200 46.21778 43.53954 45.90929 49.19173 -## 6: Gentoo Dream 3200 42.60465 39.89647 42.63520 46.28769 -## 7: Adelie Torgersen 3200 39.91652 36.80227 39.79806 43.68842 -## 8: Chinstrap Torgersen 3200 44.27807 41.95470 44.40742 46.68848 -## 9: Gentoo Torgersen 3200 42.09510 39.49863 41.80049 45.81833 -## 10: Adelie Biscoe 3550 40.77971 38.04027 40.59561 44.57505 -## 11: Chinstrap Biscoe 3550 45.81304 43.52102 45.73116 48.36366 -## 12: Gentoo Biscoe 3550 43.31233 40.77355 43.03077 47.22936 -## 13: Adelie Dream 3550 40.77741 38.07399 40.78175 44.37273 -## 14: Chinstrap Dream 3550 47.30926 44.80493 46.77540 50.47092 -## 15: Gentoo Dream 3550 43.26955 40.86119 43.16204 46.89190 -## 16: Adelie Torgersen 3550 40.25780 37.35251 40.07871 44.04576 -## 17: Chinstrap Torgersen 3550 44.77911 42.60161 44.81944 47.14986 -## 18: Gentoo Torgersen 3550 42.49520 39.95866 42.14160 46.26237 -## 19: Adelie Biscoe 3975 41.61744 38.94515 41.36634 45.38752 -## 20: Chinstrap Biscoe 3975 46.59363 44.59970 46.44923 49.11457 -## 21: Gentoo Biscoe 3975 44.07857 41.60792 43.74562 47.85109 -## 22: Adelie Dream 3975 41.50511 39.06187 41.24741 45.13027 -## 23: Chinstrap Dream 3975 48.14978 45.87390 47.54867 51.50683 -## 24: Gentoo Dream 3975 44.01928 41.70577 43.84099 47.50470 -## 25: Adelie Torgersen 3975 40.94764 38.12519 40.66759 44.73689 -## 26: Chinstrap Torgersen 3975 45.44820 43.49986 45.44036 47.63243 -## 27: Gentoo Torgersen 3975 43.13791 40.70628 42.70627 46.87306 -## 28: Adelie Biscoe 4700 42.93914 40.48463 42.44768 46.81756 -## 29: Chinstrap Biscoe 4700 47.18534 45.40866 47.07739 49.55747 -## 30: Gentoo Biscoe 4700 45.32541 43.08173 44.93498 49.23391 -## 31: Adelie Dream 4700 42.73806 40.44229 42.22226 46.49936 -## 32: Chinstrap Dream 4700 48.37354 46.34335 48.00781 51.18955 -## 33: Gentoo Dream 4700 45.09132 42.88328 44.79530 48.82180 -## 34: Adelie Torgersen 4700 42.09349 39.72074 41.56168 45.68838 -## 35: Chinstrap Torgersen 4700 46.17045 44.39042 46.09525 48.35127 -## 36: Gentoo Torgersen 4700 44.31621 42.18968 43.81773 47.98024 -## 37: Adelie Biscoe 5300 43.89769 41.43335 43.28504 48.10892 -## 38: Chinstrap Biscoe 5300 47.53721 45.66038 47.52770 49.88701 -## 39: Gentoo Biscoe 5300 46.16115 43.81722 45.59309 50.57469 -## 40: Adelie Dream 5300 43.59846 41.25825 43.24518 47.46193 +## --- ## 41: Chinstrap Dream 5300 48.48139 46.36282 48.25679 51.02996 ## 42: Gentoo Dream 5300 45.91819 43.62832 45.54110 49.91622 ## 43: Adelie Torgersen 5300 42.92879 40.66576 42.31072 46.76406 ## 44: Chinstrap Torgersen 5300 46.59576 44.80400 46.49196 49.03906 ## 45: Gentoo Torgersen 5300 45.11384 42.95190 44.51289 49.27629 -## species island body_mass_g mean lwr medn upr }\if{html}{\out{
}} By default, all combinations of all variables are used. However, you can @@ -419,39 +384,19 @@ Specify \code{pred_horizon} to get partial dependence at each value: pd_train }\if{html}{\out{
}} -\if{html}{\out{
}}\preformatted{## pred_horizon bili mean lwr medn upr -## -## 1: 500 0.55 0.06171990 0.000443399 0.008654190 0.5907104 -## 2: 1000 0.55 0.14185009 0.005793742 0.055728527 0.7360749 -## 3: 1500 0.55 0.20825053 0.013609478 0.091745579 0.8556319 -## 4: 2000 0.55 0.26790167 0.023047689 0.145741690 0.8910549 -## 5: 2500 0.55 0.31796166 0.063797305 0.202544999 0.9017710 -## 6: 3000 0.55 0.39108086 0.090852131 0.301804690 0.9234812 -## 7: 500 0.70 0.06240527 0.000443399 0.008934806 0.5980510 -## 8: 1000 0.70 0.14313570 0.006159694 0.056348007 0.7432448 -## 9: 1500 0.70 0.21012128 0.013717586 0.092461532 0.8597396 -## 10: 2000 0.70 0.27013021 0.023169510 0.146344595 0.8935664 -## 11: 2500 0.70 0.31880954 0.062506113 0.201979102 0.9068170 -## 12: 3000 0.70 0.39286323 0.089707173 0.308392927 0.9252028 -## 13: 500 1.50 0.06679162 0.001271788 0.011028398 0.6241228 -## 14: 1000 1.50 0.15727919 0.011478962 0.068332010 0.7678732 -## 15: 1500 1.50 0.23316655 0.028732095 0.117289745 0.8789647 -## 16: 2000 1.50 0.30139227 0.046792721 0.180096425 0.9144202 -## 17: 2500 1.50 0.35260943 0.084586675 0.238015966 0.9266065 -## 18: 3000 1.50 0.43512074 0.131110330 0.346025144 0.9438562 -## 19: 500 3.50 0.08638646 0.005208753 0.028239001 0.6740930 -## 20: 1000 3.50 0.22353655 0.051917978 0.139604845 0.8283986 -## 21: 1500 3.50 0.32700976 0.090198324 0.217982772 0.9371150 -## 22: 2000 3.50 0.41618105 0.144532860 0.311508093 0.9566091 -## 23: 2500 3.50 0.49248461 0.219511094 0.402095677 0.9636221 -## 24: 3000 3.50 0.56008108 0.263569896 0.503253258 0.9734948 -## 25: 500 7.25 0.12585007 0.022092057 0.063550987 0.7543806 -## 26: 1000 7.25 0.32646274 0.135343689 0.259567907 0.8884333 -## 27: 1500 7.25 0.46412653 0.218208755 0.387874346 0.9702903 -## 28: 2000 7.25 0.55117610 0.293367409 0.484277295 0.9812413 -## 29: 2500 7.25 0.62002385 0.371965247 0.569543990 0.9845058 -## 30: 3000 7.25 0.68034820 0.425128031 0.646423180 0.9888637 -## pred_horizon bili mean lwr medn upr +\if{html}{\out{
}}\preformatted{## pred_horizon bili mean lwr medn upr +## +## 1: 500 0.55 0.0617199 0.000443399 0.00865419 0.5907104 +## 2: 1000 0.55 0.1418501 0.005793742 0.05572853 0.7360749 +## 3: 1500 0.55 0.2082505 0.013609478 0.09174558 0.8556319 +## 4: 2000 0.55 0.2679017 0.023047689 0.14574169 0.8910549 +## 5: 2500 0.55 0.3179617 0.063797305 0.20254500 0.9017710 +## --- +## 26: 1000 7.25 0.3264627 0.135343689 0.25956791 0.8884333 +## 27: 1500 7.25 0.4641265 0.218208755 0.38787435 0.9702903 +## 28: 2000 7.25 0.5511761 0.293367409 0.48427730 0.9812413 +## 29: 2500 7.25 0.6200238 0.371965247 0.56954399 0.9845058 +## 30: 3000 7.25 0.6803482 0.425128031 0.64642318 0.9888637 }\if{html}{\out{
}} vector-valued \code{pred_horizon} input comes with minimal extra diff --git a/man/orsf_vi.Rd b/man/orsf_vi.Rd index 16ecf027..a0f5afb1 100644 --- a/man/orsf_vi.Rd +++ b/man/orsf_vi.Rd @@ -347,8 +347,7 @@ orsf_vi_permute(fit_custom) \item Harrell, E F, Califf, M R, Pryor, B D, Lee, L K, Rosati, A R (1982). "Evaluating the yield of medical tests." \emph{Jama}, \emph{247}(18), 2543-2546. \item Breiman, Leo (2001). "Random Forests." \emph{Machine Learning}, \emph{45}(1), -5-32. ISSN 1573-0565, doi:10.1023/A:1010933404324 -\url{https://doi.org/10.1023/A\%3A1010933404324}. +5-32. ISSN 1573-0565. \item Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, A F (2011). "On oblique random forests." In \emph{Machine Learning and Knowledge Discovery in Databases: European Conference, @@ -357,7 +356,5 @@ II 22}, 453-469. Springer. \item Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey A, Pajewski NM (2023). "Accelerated and interpretable oblique random survival forests." \emph{Journal of Computational and Graphical Statistics}, 1-16. -doi:10.1080/10618600.2023.2231048 -\url{https://doi.org/10.1080/10618600.2023.2231048}. } }