diff --git a/404.html b/404.html index 14d0c6be..a6c59578 100644 --- a/404.html +++ b/404.html @@ -31,7 +31,7 @@ aorsf - 0.1.0 + 0.1.1
oobag_fun_brier(y_mat = pbc_orsf[,c('time', 'status')],
s_vec = fit$pred_oobag)
-#> [1] 0.113472
Second, you can pass your function into orsf()
, and it
will be used in place of Harrell’s C-statistic:
@@ -327,12 +327,12 @@User-supplied function importance = 'negate') fit_tdep_cstat$importance -#> bili copper stage sex albumin age -#> 0.11431938 0.04283192 0.03011800 0.02978767 0.02434698 0.02379080 -#> protime chol ascites ast spiders edema -#> 0.02289864 0.01823123 0.01264641 0.00986446 0.00913007 0.00769056 -#> hepato trig alk.phos trt platelet -#> 0.00636780 0.00619816 0.00258887 0.00046192 -0.00201958
The AUC values, from highest to lowest:
sc$AUC$score[order(-AUC)]
## model times AUC se lower upper
-## 1: net 1788 0.9179396 0.02012887 0.8784877 0.9573915
-## 2: accel 1788 0.9106396 0.02076004 0.8699507 0.9513286
-## 3: cph 1788 0.9061167 0.02277540 0.8614777 0.9507556
-## 4: rlt 1788 0.9012605 0.02178982 0.8585533 0.9439678
-## 5: rando 1788 0.8997729 0.02201363 0.8566270 0.9429188
-## 6: pca 1788 0.8996927 0.02245483 0.8556821 0.9437034
And the indices of prediction accuracy:
sc$Brier$score[order(-IPA), .(model, times, IPA)]
## model times IPA
-## 1: net 1788 0.5020652
-## 2: cph 1788 0.4759061
-## 3: accel 1788 0.4743392
-## 4: pca 1788 0.4398468
-## 5: rlt 1788 0.4373910
-## 6: rando 1788 0.4219209
+## 1: net 1788 0.4916815
+## 2: cph 1788 0.4833913
+## 3: accel 1788 0.4749974
+## 4: rlt 1788 0.4630984
+## 5: pca 1788 0.4371223
+## 6: rando 1788 0.4258456
## 7: Null model 1788 0.0000000
From inspection,
net
, accel
, and rlt
have high discrimination and index of
prediction accuracy.
## Rows: 276
## Columns: 23
-## $ id <int> 16, 29, 43, 62, 79, 82, 103, 105, 111, 114, 115, 139, 141,~
-## $ trt <fct> placebo, placebo, d_penicill_main, placebo, d_penicill_mai~
-## $ age <dbl> 40.44353, 63.87680, 48.87064, 60.70637, 46.51608, 67.31006~
-## $ sex <fct> f, f, f, f, f, f, f, f, f, m, f, f, f, f, f, f, f, f, f, f~
-## $ ascites <fct> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0~
-## $ hepato <fct> 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1~
-## $ spiders <fct> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1~
-## $ edema <fct> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
-## $ bili <dbl> 0.7, 0.7, 1.1, 1.3, 0.8, 4.5, 2.5, 1.1, 5.5, 3.2, 0.7, 1.1~
-## $ chol <int> 204, 370, 361, 302, 315, 472, 188, 464, 528, 259, 303, 328~
-## $ albumin <dbl> 3.66, 3.78, 3.64, 2.75, 4.24, 4.09, 3.67, 4.20, 4.18, 4.30~
-## $ copper <int> 28, 24, 36, 58, 13, 154, 57, 38, 77, 208, 81, 159, 59, 76,~
-## $ alk.phos <dbl> 685.0, 5833.0, 5430.2, 1523.0, 1637.0, 1580.0, 1273.0, 164~
-## $ ast <dbl> 72.85, 73.53, 67.08, 43.40, 170.50, 117.80, 119.35, 151.90~
-## $ trig <int> 58, 86, 89, 112, 70, 272, 102, 102, 78, 78, 156, 134, 56, ~
-## $ platelet <int> 198, 390, 203, 329, 426, 412, 110, 348, 467, 268, 307, 142~
-## $ protime <dbl> 10.8, 10.6, 10.6, 13.2, 10.9, 11.1, 11.1, 10.3, 10.7, 11.7~
-## $ stage <ord> 3, 2, 2, 4, 3, 3, 4, 3, 3, 3, 3, 4, 2, 2, 3, 4, 2, 3, 4, 4~
-## $ time <int> 3672, 4509, 4556, 3090, 3707, 3574, 110, 3092, 2350, 3395,~
-## $ status <dbl> 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0~
-## $ pred_aorsf <dbl> 0.02210163, 0.12510110, 0.07571520, 0.59580668, 0.12839078~
-## $ pred_rfsrc <dbl> 0.01861595, 0.15632904, 0.07635485, 0.62281617, 0.19145913~
-## $ pred_ranger <dbl> 0.02143363, 0.13367920, 0.05892584, 0.54481330, 0.21380654~
And finish by aggregating the predictions and computing performance in the testing data. Note that I am computing one statistic for all predictions instead of computing one statistic for each fold. This @@ -742,16 +742,16 @@
library(riskRegression)
-library(survival)
+
+## riskRegression version 2023.09.08
+library(survival)
risk_preds <- list(rando = 1 - fit_rando$pred_oobag,
pca = 1 - fit_pca$pred_oobag)
@@ -176,15 +177,15 @@ Evaluate##
## model times Brier lower upper IPA
## 1: Null model 1788 20.479 18.090 22.868 0.000
-## 2: rando 1788 11.604 9.535 13.673 43.339
-## 3: pca 1788 12.870 10.872 14.869 37.154
+## 2: rando 1788 11.809 9.727 13.890 42.338
+## 3: pca 1788 12.967 10.983 14.950 36.683
##
## Results of model comparisons:
##
## times model reference delta.Brier lower upper p
-## 1: 1788 rando Null model -8.875 -11.063 -6.688 1.852437e-15
-## 2: 1788 pca Null model -7.609 -9.351 -5.866 1.143284e-17
-## 3: 1788 pca rando 1.267 0.449 2.084 2.381056e-03
+## 1: 1788 rando Null model -8.670 -10.843 -6.498 5.218847e-15
+## 2: 1788 pca Null model -7.512 -9.183 -5.842 1.226512e-18
+## 3: 1788 pca rando 1.158 0.305 2.011 7.810716e-03
##
## NOTE: Values are multiplied by 100 and given in %.
diff --git a/reference/orsf_control_fast.html b/reference/orsf_control_fast.html
index 41269cc8..6ac3a69a 100644
--- a/reference/orsf_control_fast.html
+++ b/reference/orsf_control_fast.html
@@ -10,7 +10,7 @@
aorsf
- 0.1.0
+ 0.1.1
diff --git a/reference/orsf_control_net.html b/reference/orsf_control_net.html
index c84614a4..6601e007 100644
--- a/reference/orsf_control_net.html
+++ b/reference/orsf_control_net.html
@@ -10,7 +10,7 @@
aorsf
- 0.1.0
+ 0.1.1
@@ -126,7 +126,7 @@ Examples#> N trees: 25
#> N predictors total: 17
#> N predictors per node: 5
-#> Average leaves per tree: 24
+#> Average leaves per tree: 26
#> Min observations in leaf: 5
#> Min events in leaf: 1
#> OOB stat value: 0.82
diff --git a/reference/orsf_ice_oob.html b/reference/orsf_ice_oob.html
index 2d698381..fdb05eee 100644
--- a/reference/orsf_ice_oob.html
+++ b/reference/orsf_ice_oob.html
@@ -26,7 +26,7 @@
aorsf
- 0.1.0
+ 0.1.1
@@ -214,7 +214,7 @@ Examples## N trees: 500
## N predictors total: 17
## N predictors per node: 5
-## Average leaves per tree: 25
+## Average leaves per tree: 21
## Min observations in leaf: 5
## Min events in leaf: 1
## OOB stat value: 0.84
@@ -229,17 +229,17 @@ Examples
ice_oob
## id_variable id_row pred_horizon bili pred
-## 1: 1 1 1788 1 0.9295584
-## 2: 1 2 1788 1 0.1422392
-## 3: 1 3 1788 1 0.7047846
-## 4: 1 4 1788 1 0.3845760
-## 5: 1 5 1788 1 0.1206201
+## 1: 1 1 1788 1 0.8976716
+## 2: 1 2 1788 1 0.1202763
+## 3: 1 3 1788 1 0.6842180
+## 4: 1 4 1788 1 0.3865812
+## 5: 1 5 1788 1 0.1184953
## ---
-## 6896: 25 272 1788 10 0.3878561
-## 6897: 25 273 1788 10 0.4854526
-## 6898: 25 274 1788 10 0.4389557
-## 6899: 25 275 1788 10 0.3639220
-## 6900: 25 276 1788 10 0.5461205
+## 6896: 25 272 1788 10 0.3421749
+## 6897: 25 273 1788 10 0.4296413
+## 6898: 25 274 1788 10 0.4496536
+## 6899: 25 275 1788 10 0.3186596
+## 6900: 25 276 1788 10 0.5490316
Much more detailed examples are given in the vignette
diff --git a/reference/orsf_pd_oob.html b/reference/orsf_pd_oob.html index 005a4f8f..2d475106 100644 --- a/reference/orsf_pd_oob.html +++ b/reference/orsf_pd_oob.html @@ -26,7 +26,7 @@ aorsf - 0.1.0 + 0.1.1pd_train <- orsf_pd_inb(fit, pred_spec = list(bili = 1:5))
pd_train
## pred_horizon bili mean lwr medn upr
-## 1: 1826.25 1 0.2188047 0.01435497 0.09604722 0.8243506
-## 2: 1826.25 2 0.2540831 0.03086042 0.13766124 0.8442959
-## 3: 1826.25 3 0.2982917 0.05324065 0.19470910 0.8578131
-## 4: 1826.25 4 0.3536969 0.09755193 0.27774884 0.8699063
-## 5: 1826.25 5 0.3955249 0.14622431 0.29945708 0.8775099
## pred_horizon bili mean lwr medn upr
+## 1: 1826.25 1 0.2046395 0.02119497 0.1038427 0.7755589
+## 2: 1826.25 2 0.2372342 0.03380476 0.1307957 0.8017817
+## 3: 1826.25 3 0.2785774 0.05468892 0.1828047 0.8173042
+## 4: 1826.25 4 0.3286266 0.09236600 0.2433536 0.8357596
+## 5: 1826.25 5 0.3641739 0.12598059 0.2811453 0.8390030
using out-of-bag predictions for the training data
pd_train <- orsf_pd_oob(fit, pred_spec = list(bili = 1:5))
pd_train
## pred_horizon bili mean lwr medn upr
-## 1: 1826.25 1 0.2182691 0.01218789 0.1008030 0.8304537
-## 2: 1826.25 2 0.2542021 0.02447359 0.1453580 0.8484741
-## 3: 1826.25 3 0.2980946 0.04854875 0.1997769 0.8640601
-## 4: 1826.25 4 0.3552203 0.10116417 0.2691853 0.8642393
-## 5: 1826.25 5 0.3959143 0.14768055 0.3264149 0.8737186
using predictions for a new set of data
pd_test <- orsf_pd_new(fit,
new_data = pbc_orsf_test,
@@ -267,11 +267,11 @@ Three ways to compute PD and ICE
pd_test
## pred_horizon bili mean lwr medn upr
-## 1: 1826.25 1 0.2643662 0.01758300 0.2098936 0.8410357
-## 2: 1826.25 2 0.2990578 0.04063388 0.2516202 0.8553218
-## 3: 1826.25 3 0.3432503 0.06843859 0.3056799 0.8670726
-## 4: 1826.25 4 0.3968111 0.11801725 0.3593064 0.8725208
-## 5: 1826.25 5 0.4388962 0.16038177 0.4094224 0.8809027
in-bag partial dependence indicates relationships that the model has learned during training. This is helpful if your goal is to interpret the model.
attr(fit, 'importance_values')
-## ascites_1 edema_1 bili copper albumin age
-## 0.44989185 0.43936093 0.29908016 0.22471022 0.20573664 0.19373368
-## edema_0.5 protime chol stage spiders_1 ast
-## 0.19096711 0.17582704 0.17527675 0.17057992 0.16721527 0.16061635
-## sex_f hepato_1 trig alk.phos platelet trt_placebo
-## 0.14513788 0.14241390 0.12695468 0.12228332 0.10395510 0.09001406
## edema_1 ascites_1 bili copper albumin age
+## 0.53189300 0.49950642 0.39598881 0.30443254 0.26028060 0.24758399
+## protime stage chol edema_0.5 spiders_1 ast
+## 0.22874192 0.20974576 0.20353982 0.18401760 0.18090452 0.17457962
+## hepato_1 sex_f trig alk.phos platelet trt_placebo
+## 0.16402406 0.14803440 0.13009809 0.11627907 0.07853659 0.06939410
these are ‘raw’ because values for factors have not been aggregated into a single value. Currently there is one value for k-1 levels of a k level factor. For example, you can see edema_1 and edema_0.5 in the importance @@ -234,19 +234,19 @@
access the importance
element from the orsf
fit:
fit$importance
## ascites bili edema copper albumin age protime
-## 0.44989185 0.29908016 0.29150746 0.22471022 0.20573664 0.19373368 0.17582704
-## chol stage spiders ast sex hepato trig
-## 0.17527675 0.17057992 0.16721527 0.16061635 0.14513788 0.14241390 0.12695468
+## 0.49950642 0.39598881 0.32482431 0.30443254 0.26028060 0.24758399 0.22874192
+## stage chol spiders ast hepato sex trig
+## 0.20974576 0.20353982 0.18090452 0.17457962 0.16402406 0.14803440 0.13009809
## alk.phos platelet trt
-## 0.12228332 0.10395510 0.09001406
use orsf_vi()
with group_factors set to TRUE
(the default)
orsf_vi(fit)
## ascites bili edema copper albumin age protime
-## 0.44989185 0.29908016 0.29150746 0.22471022 0.20573664 0.19373368 0.17582704
-## chol stage spiders ast sex hepato trig
-## 0.17527675 0.17057992 0.16721527 0.16061635 0.14513788 0.14241390 0.12695468
+## 0.49950642 0.39598881 0.32482431 0.30443254 0.26028060 0.24758399 0.22874192
+## stage chol spiders ast hepato sex trig
+## 0.20974576 0.20353982 0.18090452 0.17457962 0.16402406 0.14803440 0.13009809
## alk.phos platelet trt
-## 0.12228332 0.10395510 0.09001406
Note that you can make the default returned importance values ungrouped
by setting group_factors
to FALSE
in the orsf_vi
functions or the
orsf
function.
## bili copper sex stage protime age
-## 0.117833946 0.046771025 0.038096005 0.026596235 0.023892153 0.022568331
-## albumin ascites chol ast edema hepato
-## 0.020502226 0.015764542 0.013505575 0.011507061 0.007444267 0.007318432
-## trt spiders alk.phos trig platelet
-## 0.006135388 0.005416366 0.003385460 0.003359579 0.001225734
## bili copper sex protime stage albumin
+## 0.118355612 0.048917049 0.037068840 0.027044335 0.023867241 0.021214168
+## age ascites chol ast hepato edema
+## 0.020517824 0.014993236 0.014726515 0.011441749 0.007711157 0.007218808
+## spiders trig alk.phos trt platelet
+## 0.006372905 0.003230269 0.002823511 0.002469395 0.001550349
orsf_vi_permute(fit_no_vi)
## bili copper age protime albumin
-## 0.0557854459 0.0230058852 0.0142318894 0.0139189306 0.0138242166
-## ascites stage chol ast edema
-## 0.0122576604 0.0122514140 0.0062628391 0.0060073065 0.0057933534
+## bili copper protime albumin ascites
+## 0.0546201463 0.0248826626 0.0154623867 0.0135573758 0.0134222183
+## age stage chol edema ast
+## 0.0119054385 0.0113940805 0.0074991392 0.0052943907 0.0051219919
## hepato spiders sex trig alk.phos
-## 0.0052890246 0.0038620727 0.0031610738 0.0014580912 0.0009063636
+## 0.0050381864 0.0046277553 0.0039401072 0.0024125340 0.0009602607
## platelet trt
-## 0.0001124081 -0.0017971380
+## 0.0004343594 -0.0018248238
## bili copper age ascites protime
-## 0.0537706105 0.0232845222 0.0135823364 0.0127916446 0.0125320108
-## albumin stage ast edema hepato
-## 0.0115100144 0.0109035858 0.0063943212 0.0062769135 0.0048230621
-## chol spiders sex trig alk.phos
-## 0.0042752565 0.0030699653 0.0025422803 0.0022410492 0.0010977282
+## bili copper age ascites albumin
+## 5.352210e-02 2.610549e-02 1.286639e-02 1.251888e-02 1.205836e-02
+## protime stage ast chol edema
+## 1.084665e-02 1.057182e-02 8.228770e-03 6.002428e-03 5.834663e-03
+## spiders hepato sex trig alk.phos
+## 4.760070e-03 3.437825e-03 3.388559e-03 2.274705e-03 2.226034e-03
## platelet trt
-## 0.0010972387 -0.0005947093
+## 1.424997e-03 -4.992912e-06
You can still get negation VI from this fit, but it needs to be computed
orsf_vi_negate(fit_permute_vi)
## bili copper sex age protime stage
-## 0.120854614 0.046515980 0.036380485 0.022668834 0.021816803 0.021111101
-## albumin ascites ast chol edema spiders
-## 0.018969867 0.014101778 0.013042103 0.011220170 0.008009693 0.006193354
-## trt hepato trig alk.phos platelet
-## 0.005184060 0.005113622 0.003389060 0.003156121 0.002242597
## bili copper sex stage protime albumin
+## 0.124726411 0.052319578 0.038681018 0.027479231 0.022737472 0.022214271
+## age ast ascites chol spiders edema
+## 0.020375826 0.013404081 0.013208974 0.011856865 0.008990815 0.007340934
+## hepato trt trig alk.phos platelet
+## 0.006432032 0.004392664 0.004237792 0.003229450 0.002576929
## [,1] [,2] [,3]
-## [1,] 0.49884105 0.77681319 0.91901860
-## [2,] 0.04475471 0.09161544 0.17682278
-## [3,] 0.12850458 0.27603519 0.41455070
-## [4,] 0.01279086 0.02980402 0.06458151
-## [5,] 0.01277317 0.02249769 0.04875677
## [,1] [,2] [,3]
+## [1,] 0.459077419 0.73067673 0.89246351
+## [2,] 0.032194868 0.08028381 0.15592011
+## [3,] 0.115945485 0.24099853 0.38094684
+## [4,] 0.008378033 0.02964250 0.06977315
+## [5,] 0.009798295 0.01793586 0.04454374
# predicted survival, i.e., 1 - risk
predict(fit,
new_data = pbc_orsf_test[1:5, ],
pred_type = 'surv',
pred_horizon = c(500, 1000, 1500))
## [,1] [,2] [,3]
-## [1,] 0.5011589 0.2231868 0.0809814
-## [2,] 0.9552453 0.9083846 0.8231772
-## [3,] 0.8714954 0.7239648 0.5854493
-## [4,] 0.9872091 0.9701960 0.9354185
-## [5,] 0.9872268 0.9775023 0.9512432
# predicted cumulative hazard function
# (expected number of events for person i at time j)
predict(fit,
@@ -214,11 +214,11 @@ Examples pred_type = 'chf',
pred_horizon = c(500, 1000, 1500))
## [,1] [,2] [,3]
-## [1,] 0.70860748 1.40641948 1.79893071
-## [2,] 0.04954335 0.11460828 0.24130253
-## [3,] 0.16616222 0.43287394 0.71524591
-## [4,] 0.01443848 0.03640393 0.08366798
-## [5,] 0.01435412 0.02680792 0.06203327
Predict mortality, defined as the number of events in the forest’s population if all observations had characteristics like the current observation. This type of prediction does not require you to specify a @@ -226,12 +226,12 @@
predict(fit,
new_data = pbc_orsf_test[1:5, ],
pred_type = 'mort')
## [,1]
-## [1,] 81.23490
-## [2,] 27.69730
-## [3,] 41.52408
-## [4,] 15.79522
-## [5,] 10.65239
## [,1]
+## [1,] 78.646185
+## [2,] 20.872849
+## [3,] 37.341745
+## [4,] 13.616617
+## [5,] 8.798328