diff --git a/404.html b/404.html
index ea26d588..5c5fba5a 100644
--- a/404.html
+++ b/404.html
@@ -31,7 +31,7 @@
     
     <a class="navbar-brand me-2" href="https://bcjaeger.github.io/aorsf/index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/CONTRIBUTING.html b/CONTRIBUTING.html
index 46032112..6783e1d4 100644
--- a/CONTRIBUTING.html
+++ b/CONTRIBUTING.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/LICENSE-text.html b/LICENSE-text.html
index 127a6366..9c209262 100644
--- a/LICENSE-text.html
+++ b/LICENSE-text.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/LICENSE.html b/LICENSE.html
index 359e2b7b..f4b26265 100644
--- a/LICENSE.html
+++ b/LICENSE.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/articles/aorsf.html b/articles/aorsf.html
index 0835d846..01a5b198 100644
--- a/articles/aorsf.html
+++ b/articles/aorsf.html
@@ -35,7 +35,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/articles/fast.html b/articles/fast.html
index 4bd09d05..d36ca12b 100644
--- a/articles/fast.html
+++ b/articles/fast.html
@@ -33,7 +33,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
@@ -127,7 +127,7 @@ <h2 id="dont-specify-a-control">Don’t specify a <code>control</code><a class="
 <span><span class="co"># unspecified control is much faster</span></span>
 <span><span class="va">time_net</span><span class="op">[</span><span class="st">'elapsed'</span><span class="op">]</span> <span class="op">/</span> <span class="va">time_fast</span><span class="op">[</span><span class="st">'elapsed'</span><span class="op">]</span></span>
 <span><span class="co">#&gt;  elapsed </span></span>
-<span><span class="co">#&gt; 45.80952</span></span></code></pre></div>
+<span><span class="co">#&gt; 45.09091</span></span></code></pre></div>
 </div>
 <div class="section level2">
 <h2 id="use-n_thread">Use <code>n_thread</code><a class="anchor" aria-label="anchor" href="#use-n_thread"></a>
@@ -218,11 +218,12 @@ <h2 id="dont-wait--estimate">Don’t wait. Estimate!<a class="anchor" aria-label
 <span> <span class="va">time_est</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/orsf_time_to_train.html">orsf_time_to_train</a></span><span class="op">(</span><span class="va">fit_spec</span>, n_tree_subset <span class="op">=</span> <span class="fl">5</span><span class="op">)</span></span>
 <span><span class="op">)</span></span>
 <span><span class="co">#&gt;    user  system elapsed </span></span>
-<span><span class="co">#&gt;   0.267   0.001   0.267</span></span>
-<span></span>
+<span><span class="co">#&gt;   0.275   0.000   0.275</span></span></code></pre></div>
+<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
+<code class="sourceCode R"><span></span>
 <span><span class="co"># the estimated training time:</span></span>
 <span><span class="va">time_est</span></span>
-<span><span class="co">#&gt; Time difference of 106.6964 secs</span></span></code></pre></div>
+<span><span class="co">#&gt; Time difference of 109.7086 secs</span></span></code></pre></div>
 </div>
   </main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
     </nav></aside>
diff --git a/articles/index.html b/articles/index.html
index 12e0e03b..e88a30a6 100644
--- a/articles/index.html
+++ b/articles/index.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/articles/oobag.html b/articles/oobag.html
index 386e2168..b9d58020 100644
--- a/articles/oobag.html
+++ b/articles/oobag.html
@@ -33,7 +33,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
@@ -133,8 +133,9 @@ <h2 id="out-of-bag-predictions-and-error">Out-of-bag predictions and error<a cla
 <code class="sourceCode R"><span></span>
 <span><span class="co"># what function is used to evaluate out-of-bag predictions?</span></span>
 <span><span class="va">fit</span><span class="op">$</span><span class="va">eval_oobag</span><span class="op">$</span><span class="va">stat_type</span></span>
-<span><span class="co">#&gt; [1] "Harrell's C-index"</span></span>
-<span></span>
+<span><span class="co">#&gt; [1] "Harrell's C-index"</span></span></code></pre></div>
+<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
+<code class="sourceCode R"><span></span>
 <span><span class="co"># what is the output from this function?</span></span>
 <span><span class="va">fit</span><span class="op">$</span><span class="va">eval_oobag</span><span class="op">$</span><span class="va">stat_values</span></span>
 <span><span class="co">#&gt;           [,1]</span></span>
@@ -153,7 +154,7 @@ <h2 id="monitoring-out-of-bag-error">Monitoring out-of-bag error<a class="anchor
 error at every <code>oobag_eval_every</code> tree. For example, let’s
 compute out-of-bag error after fitting each tree in a forest of 50
 trees:</p>
-<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
+<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span></span>
 <span><span class="va">fit</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/orsf.html">orsf</a></span><span class="op">(</span>data <span class="op">=</span> <span class="va">pbc_orsf</span>,</span>
 <span>            formula <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/pkg/survival/man/Surv.html" class="external-link">Surv</a></span><span class="op">(</span><span class="va">time</span>, <span class="va">status</span><span class="op">)</span> <span class="op">~</span> <span class="va">.</span> <span class="op">-</span> <span class="va">id</span>,</span>
@@ -182,7 +183,7 @@ <h2 id="user-supplied-out-of-bag-evaluation-functions">User-supplied out-of-bag
 <p>In some cases, you may want more control over how out-of-bag error is
 estimated. For example, let’s use the Brier score from the
 <code>SurvMetrics</code> package:</p>
-<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
+<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span></span>
 <span><span class="va">oobag_brier_surv</span> <span class="op">&lt;-</span> <span class="kw">function</span><span class="op">(</span><span class="va">y_mat</span>, <span class="va">w_vec</span>, <span class="va">s_vec</span><span class="op">)</span><span class="op">{</span></span>
 <span></span>
@@ -212,7 +213,7 @@ <h2 id="user-supplied-out-of-bag-evaluation-functions">User-supplied out-of-bag
 <p>There are two ways to apply your own function to compute out-of-bag
 error. First, you can apply your function to the out-of-bag survival
 predictions that are stored in ‘aorsf’ objects, e.g:</p>
-<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
+<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span></span>
 <span><span class="fu">oobag_brier_surv</span><span class="op">(</span>y_mat <span class="op">=</span> <span class="va">pbc_orsf</span><span class="op">[</span>,<span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">'time'</span>, <span class="st">'status'</span><span class="op">)</span><span class="op">]</span>,</span>
 <span>                 s_vec <span class="op">=</span> <span class="va">fit</span><span class="op">$</span><span class="va">pred_oobag</span><span class="op">)</span></span>
@@ -220,7 +221,7 @@ <h2 id="user-supplied-out-of-bag-evaluation-functions">User-supplied out-of-bag
 <span><span class="co">#&gt; [1] 0.11869</span></span></code></pre></div>
 <p>Second, you can pass your function into <code><a href="../reference/orsf.html">orsf()</a></code>, and it
 will be used in place of Harrell’s C-statistic:</p>
-<div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
+<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span></span>
 <span><span class="co"># instead of copy/pasting the modeling code and then modifying it,</span></span>
 <span><span class="co"># you can just use orsf_update.</span></span>
diff --git a/articles/pd.html b/articles/pd.html
index 604e6dc4..c4ff680a 100644
--- a/articles/pd.html
+++ b/articles/pd.html
@@ -35,7 +35,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/authors.html b/authors.html
index c9ab5fb5..c623adfe 100644
--- a/authors.html
+++ b/authors.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/index.html b/index.html
index 6b30e3f3..4c63d5cf 100644
--- a/index.html
+++ b/index.html
@@ -33,7 +33,7 @@
     
     <a class="navbar-brand me-2" href="index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/news/index.html b/news/index.html
index a7e1be70..ee035dcf 100644
--- a/news/index.html
+++ b/news/index.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/pkgdown.yml b/pkgdown.yml
index bdb82781..df82e6d9 100644
--- a/pkgdown.yml
+++ b/pkgdown.yml
@@ -6,7 +6,7 @@ articles:
   fast: fast.html
   oobag: oobag.html
   pd: pd.html
-last_built: 2024-05-04T22:08Z
+last_built: 2024-05-29T21:14Z
 urls:
   reference: https://bcjaeger.github.io/aorsf/reference
   article: https://bcjaeger.github.io/aorsf/articles
diff --git a/reference/aorsf-package.html b/reference/aorsf-package.html
index aca4d212..bb62aa45 100644
--- a/reference/aorsf-package.html
+++ b/reference/aorsf-package.html
@@ -14,7 +14,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/as.data.table.orsf_summary_uni.html b/reference/as.data.table.orsf_summary_uni.html
index d306f06f..751a68ae 100644
--- a/reference/as.data.table.orsf_summary_uni.html
+++ b/reference/as.data.table.orsf_summary_uni.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/index.html b/reference/index.html
index 6e6d21bb..47e0d3a5 100644
--- a/reference/index.html
+++ b/reference/index.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf.html b/reference/orsf.html
index a7f4b0be..a85de229 100644
--- a/reference/orsf.html
+++ b/reference/orsf.html
@@ -16,7 +16,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_control.html b/reference/orsf_control.html
index 991983cd..e25b49f0 100644
--- a/reference/orsf_control.html
+++ b/reference/orsf_control.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_control_cph.html b/reference/orsf_control_cph.html
index 66ef5407..c230d301 100644
--- a/reference/orsf_control_cph.html
+++ b/reference/orsf_control_cph.html
@@ -14,7 +14,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_control_custom.html b/reference/orsf_control_custom.html
index 73d93c03..f032adf1 100644
--- a/reference/orsf_control_custom.html
+++ b/reference/orsf_control_custom.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_control_fast.html b/reference/orsf_control_fast.html
index 6524f21e..9da658a5 100644
--- a/reference/orsf_control_fast.html
+++ b/reference/orsf_control_fast.html
@@ -12,7 +12,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_control_net.html b/reference/orsf_control_net.html
index 3b05bcd0..bcb3f34b 100644
--- a/reference/orsf_control_net.html
+++ b/reference/orsf_control_net.html
@@ -12,7 +12,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_ice_oob.html b/reference/orsf_ice_oob.html
index 969147f9..246670a6 100644
--- a/reference/orsf_ice_oob.html
+++ b/reference/orsf_ice_oob.html
@@ -26,7 +26,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_pd_oob.html b/reference/orsf_pd_oob.html
index 739e21d5..d9b01c42 100644
--- a/reference/orsf_pd_oob.html
+++ b/reference/orsf_pd_oob.html
@@ -26,7 +26,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_scale_cph.html b/reference/orsf_scale_cph.html
index dbed9da5..e6680b6c 100644
--- a/reference/orsf_scale_cph.html
+++ b/reference/orsf_scale_cph.html
@@ -12,7 +12,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_summarize_uni.html b/reference/orsf_summarize_uni.html
index fe29a758..c359b750 100644
--- a/reference/orsf_summarize_uni.html
+++ b/reference/orsf_summarize_uni.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_time_to_train.html b/reference/orsf_time_to_train.html
index e732b2b6..b6c85e33 100644
--- a/reference/orsf_time_to_train.html
+++ b/reference/orsf_time_to_train.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
@@ -96,7 +96,7 @@ <h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-e
 <span class="r-in"><span><span class="va">time_estimated</span> <span class="op">&lt;-</span> <span class="fu">orsf_time_to_train</span><span class="op">(</span><span class="va">object</span>, n_tree_subset<span class="op">=</span><span class="fl">1</span><span class="op">)</span></span></span>
 <span class="r-in"><span></span></span>
 <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/base/print.html" class="external-link">print</a></span><span class="op">(</span><span class="va">time_estimated</span><span class="op">)</span></span></span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> Time difference of 0.04316807 secs</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> Time difference of 0.04352093 secs</span>
 <span class="r-in"><span></span></span>
 <span class="r-in"><span><span class="co"># let's see how close the approximation was</span></span></span>
 <span class="r-in"><span><span class="va">time_true_start</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/Sys.time.html" class="external-link">Sys.time</a></span><span class="op">(</span><span class="op">)</span></span></span>
@@ -106,11 +106,11 @@ <h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-e
 <span class="r-in"><span><span class="va">time_true</span> <span class="op">&lt;-</span> <span class="va">time_true_stop</span> <span class="op">-</span> <span class="va">time_true_start</span></span></span>
 <span class="r-in"><span></span></span>
 <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/base/print.html" class="external-link">print</a></span><span class="op">(</span><span class="va">time_true</span><span class="op">)</span></span></span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> Time difference of 0.05726409 secs</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> Time difference of 0.05836177 secs</span>
 <span class="r-in"><span></span></span>
 <span class="r-in"><span><span class="co"># error</span></span></span>
 <span class="r-in"><span><span class="fu"><a href="https://rdrr.io/r/base/MathFun.html" class="external-link">abs</a></span><span class="op">(</span><span class="va">time_true</span> <span class="op">-</span> <span class="va">time_estimated</span><span class="op">)</span></span></span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> Time difference of 0.01409602 secs</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> Time difference of 0.01484084 secs</span>
 <span class="r-in"><span></span></span>
 </code></pre></div>
     </div>
diff --git a/reference/orsf_update.html b/reference/orsf_update.html
index cde91f9b..d248149f 100644
--- a/reference/orsf_update.html
+++ b/reference/orsf_update.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_vi.html b/reference/orsf_vi.html
index dc28dc69..4c703887 100644
--- a/reference/orsf_vi.html
+++ b/reference/orsf_vi.html
@@ -12,7 +12,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_vint.html b/reference/orsf_vint.html
index 8e379481..b4c5f0df 100644
--- a/reference/orsf_vint.html
+++ b/reference/orsf_vint.html
@@ -14,7 +14,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/orsf_vs.html b/reference/orsf_vs.html
index 660d33f6..6e342d47 100644
--- a/reference/orsf_vs.html
+++ b/reference/orsf_vs.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/pbc_orsf.html b/reference/pbc_orsf.html
index b5dead67..84928c0b 100644
--- a/reference/pbc_orsf.html
+++ b/reference/pbc_orsf.html
@@ -12,7 +12,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/penguins_orsf.html b/reference/penguins_orsf.html
index 14e7f541..93877cae 100644
--- a/reference/penguins_orsf.html
+++ b/reference/penguins_orsf.html
@@ -20,7 +20,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/pred_spec_auto.html b/reference/pred_spec_auto.html
index a90e3558..a6ed4383 100644
--- a/reference/pred_spec_auto.html
+++ b/reference/pred_spec_auto.html
@@ -18,7 +18,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/predict.ObliqueForest.html b/reference/predict.ObliqueForest.html
index 0040973c..53904b62 100644
--- a/reference/predict.ObliqueForest.html
+++ b/reference/predict.ObliqueForest.html
@@ -14,7 +14,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/print.ObliqueForest.html b/reference/print.ObliqueForest.html
index 6934b4e7..a192e3a7 100644
--- a/reference/print.ObliqueForest.html
+++ b/reference/print.ObliqueForest.html
@@ -38,7 +38,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/reference/print.orsf_summary_uni.html b/reference/print.orsf_summary_uni.html
index 4cee11bc..8f7d7a22 100644
--- a/reference/print.orsf_summary_uni.html
+++ b/reference/print.orsf_summary_uni.html
@@ -10,7 +10,7 @@
     
     <a class="navbar-brand me-2" href="../index.html">aorsf</a>
 
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.4.9001</small>
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">0.1.5</small>
 
     
     <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
diff --git a/search.json b/search.json
index afee3c38..ab934912 100644
--- a/search.json
+++ b/search.json
@@ -1 +1 @@
-[{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to aorsf","title":"Contributing to aorsf","text":"Want contribute aorsf? Great! aorsf initially stable state development, great deal active subsequent development envisioned. outline propose change aorsf. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to aorsf","text":"can fix typos, spelling mistakes, grammatical errors documentation directly using GitHub web interface, long changes made source file. generally means ’ll need edit roxygen2 comments .R, .Rd file. can find .R file generates .Rd reading comment first line.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"bigger-changes","dir":"","previous_headings":"","what":"Bigger changes","title":"Contributing to aorsf","text":"want make bigger change, ’s good idea first file issue make sure someone team agrees ’s needed. ’ve found bug, please file issue illustrates bug minimal reprex (also help write unit test, needed).","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"Bigger changes","what":"Pull request process","title":"Contributing to aorsf","text":"Fork package clone onto computer. haven’t done , recommend using usethis::create_from_github(\"ropensci/aorsf\", fork = TRUE). Install development dependencies devtools::install_dev_deps(), make sure package passes R CMD check running devtools::check(). R CMD check doesn’t pass cleanly, ’s good idea ask help continuing. Create Git branch pull request (PR). recommend using usethis::pr_init(\"brief-description--change\"). Make changes, commit git, create PR running usethis::pr_push(), following prompts browser. title PR briefly describe change. body PR contain Fixes #issue-number. user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Bigger changes","what":"Code style","title":"Contributing to aorsf","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat unit tests. Contributions test cases included easier accept.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to aorsf","text":"Please note aorsf project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://bcjaeger.github.io/aorsf/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2022 aorsf authors (Byron C. Jaeger, Sawyer Welden, Nicholas M. Pajewski) Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"background","dir":"Articles","previous_headings":"","what":"Background","title":"Introduction to aorsf","text":"oblique random forest (RF) extension traditional (axis-based) RF. Instead using single variable split data grow new branches, trees oblique RF use weighted combination multiple variables.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"oblique-rfs-for-survival-classification-and-regression","dir":"Articles","previous_headings":"","what":"Oblique RFs for survival, classification, and regression","title":"Introduction to aorsf","text":"purpose aorsf (‘’ short accelerated) provide unifying framework fit oblique RFs can scale adequately large data sets. fastest algorithms available package used default often equivalent prediction accuracy computational approaches. center piece aorsf orsf() function. initial versions aorsf, orsf() function fit oblique random survival forests, now allows classification, regression, survival forests. (may introduce orf() function future name orsf() misleading users.) classification, fit oblique RF predict penguin species using penguin data magnificent palmerpenguins R package regression, use data predict bill length penguins: personal favorite oblique survival RF accelerated Cox regression great combination prediction accuracy computational efficiency (see JCGS paper). , predict mortality risk following diagnosis primary biliary cirrhosis: may notice first input aorsf data. design choice makes easier use orsf pipes (.e., %>% |>). instance,","code":"# An oblique classification RF penguin_fit <- orsf(data = penguins_orsf, formula = species ~ .)  penguin_fit #> ---------- Oblique random classification forest #>  #>      Linear combinations: Accelerated Logistic regression #>           N observations: 333 #>                N classes: 3 #>                  N trees: 500 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 5.542 #> Min observations in leaf: 5 #>           OOB stat value: 1.00 #>            OOB stat type: AUC-ROC #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique regression RF bill_fit <- orsf(data = penguins_orsf, formula = bill_length_mm ~ .)  bill_fit #> ---------- Oblique random regression forest #>  #>      Linear combinations: Accelerated Linear regression #>           N observations: 333 #>                  N trees: 500 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 49.958 #> Min observations in leaf: 5 #>           OOB stat value: 0.81 #>            OOB stat type: RSQ #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique survival RF pbc_fit <- orsf(data = pbc_orsf,                  n_tree = 5,                 formula = Surv(time, status) ~ . - id)  pbc_fit #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 5 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 21.6 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.77 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> ----------------------------------------- library(dplyr)  pbc_fit <- pbc_orsf |>   select(-id) |>   orsf(formula = Surv(time, status) ~ .,       n_tree = 5)"},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"interpretation","dir":"Articles","previous_headings":"","what":"Interpretation","title":"Introduction to aorsf","text":"aorsf includes several functions dedicated interpretation ORSFs, estimation partial dependence variable importance.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"variable-importance","dir":"Articles","previous_headings":"Interpretation","what":"Variable importance","title":"Introduction to aorsf","text":"multiple methods compute variable importance, can applied type oblique forest. compute negation importance, ORSF multiplies coefficient variable -1 re-computes --sample (sometimes referred --bag) accuracy ORSF model. can also compute variable importance using permutation, classical approach noises predictor assigned resulting degradation prediction accuracy importance predictor. faster alternative permutation negation importance ANOVA importance, computes proportion times variable obtains low p-value (p < 0.01) forest grown.","code":"orsf_vi_negate(pbc_fit) #>          bili           age        copper           ast           sex  #>  0.1468851774  0.0606952129  0.0246435580  0.0224269123  0.0175587328  #>          trig      alk.phos       protime         edema          chol  #>  0.0096895007  0.0093198869  0.0086039712  0.0006382134 -0.0015687436  #>       ascites      platelet        hepato       spiders           trt  #> -0.0060269468 -0.0102280228 -0.0108549805 -0.0113883544 -0.0201827916  #>         stage       albumin  #> -0.0221462608 -0.0224072750 orsf_vi_permute(penguin_fit) #>    bill_length_mm flipper_length_mm     bill_depth_mm            island  #>      0.1724983056      0.1024126291      0.0751508005      0.0676077927  #>       body_mass_g               sex              year  #>      0.0626576714      0.0186787401      0.0009286133 orsf_vi_anova(bill_fit) #>           species               sex            island flipper_length_mm  #>        0.34861430        0.21055730        0.11626929        0.08843136  #>       body_mass_g     bill_depth_mm              year  #>        0.07642887        0.06077348        0.01475293"},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"partial-dependence-pd","dir":"Articles","previous_headings":"Interpretation","what":"Partial dependence (PD)","title":"Introduction to aorsf","text":"Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model’s prediction. PD, see vignette","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"individual-conditional-expectations-ice","dir":"Articles","previous_headings":"Interpretation","what":"Individual conditional expectations (ICE)","title":"Introduction to aorsf","text":"Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor. ICE, see vignette","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"what-about-the-original-orsf","dir":"Articles","previous_headings":"","what":"What about the original ORSF?","title":"Introduction to aorsf","text":"original ORSF (.e., obliqueRSF) used glmnet find linear combinations inputs. aorsf allows users implement approach using orsf_control_survival(method = 'net') function: net forests fit lot faster original ORSF function obliqueRSF. However, net forests still much slower cph ones.","code":"orsf_net <- orsf(data = pbc_orsf,                   formula = Surv(time, status) ~ . - id,                   control = orsf_control_survival(method = 'net'))"},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"aorsf-and-other-machine-learning-software","dir":"Articles","previous_headings":"","what":"aorsf and other machine learning software","title":"Introduction to aorsf","text":"unique feature aorsf fast algorithms fit ORSF ensembles. RLT obliqueRSF fit oblique random survival forests, aorsf faster. ranger randomForestSRC fit survival forests, neither package supports oblique splitting. obliqueRF fits oblique random forests classification regression, survival. PPforest fits oblique random forests classification survival. Note: default prediction behavior aorsf models produce predicted risk specific prediction horizon, default ranger randomForestSRC. think change future, computing time independent predictions aorsf helpful.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"learning-more","dir":"Articles","previous_headings":"","what":"Learning more","title":"Introduction to aorsf","text":"aorsf began dedicated package oblique random survival forests, papers published far focused survival analysis risk prediction. However, routines regression classification oblique RFs aorsf high overlap survival ones. See orsf details oblique random survival forests. see JCGS paper details algorithms used specifically aorsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"go-faster","dir":"Articles","previous_headings":"","what":"Go faster","title":"Tips to speed up computation","text":"Analyses can slow crawl models need hours run. article find tricks prevent bottleneck using orsf().","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"dont-specify-a-control","dir":"Articles","previous_headings":"","what":"Don’t specify a control","title":"Tips to speed up computation","text":"default control orsf() NULL , unspecified, orsf() pick fastest possible control depending type forest grown. default control run-time compared approaches can striking. example:","code":"time_fast <- system.time(  expr = orsf(pbc_orsf,               formula = time+status~. -id,               n_tree = 5) )  time_net <- system.time(  expr = orsf(pbc_orsf,               formula = time+status~. -id,               control = orsf_control_survival(method = 'net'),               n_tree = 5) )  # unspecified control is much faster time_net['elapsed'] / time_fast['elapsed'] #>  elapsed  #> 45.80952"},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"use-n_thread","dir":"Articles","previous_headings":"","what":"Use n_thread","title":"Tips to speed up computation","text":"n_thread argument uses multi-threading run aorsf functions parallel possible. know many threads want, e.g. want exactly 5, set n_thread = 5. aren’t sure many threads available want use feasible amount, using n_thread = 0 (default) tells aorsf . Note: sometimes multi-threading possible. example, R single threaded language, multi-threading applied orsf() needs call R functions C++, occurs customized R function used find linear combination variables compute prediction accuracy.","code":"# automatically pick number of threads based on amount available  orsf(pbc_orsf,       formula = time+status~. -id,       n_tree = 5,      n_thread = 0)"},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"do-less","dir":"Articles","previous_headings":"","what":"Do less","title":"Tips to speed up computation","text":"inputs orsf() can adjusted make run faster: set n_retry 0 set oobag_pred_type 'none' set importance 'none' increase split_min_events, split_min_obs, leaf_min_events, leaf_min_obs make trees stop growing sooner increase split_min_stat enforce strict requirements growing deeper trees. Applying tips: modifying inputs can make orsf() run faster, can also impact prediction accuracy.","code":"orsf(pbc_orsf,       formula = time+status~.,       n_thread = 0,       n_tree = 5,       n_retry = 0,      oobag_pred_type = 'none',       importance = 'none',      split_min_events = 20,       leaf_min_events = 10,      split_min_stat = 10)"},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"show-progress","dir":"Articles","previous_headings":"","what":"Show progress","title":"Tips to speed up computation","text":"Setting verbose_progress = TRUE doesn’t make anything run faster, can help make feel like things running less slow.","code":"verbose_fit <- orsf(pbc_orsf,                      formula = time+status~. -id,                      n_tree = 5,                      verbose_progress = TRUE) #> Growing trees: 100%.  #> Computing predictions: 100%."},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"dont-wait--estimate","dir":"Articles","previous_headings":"","what":"Don’t wait. Estimate!","title":"Tips to speed up computation","text":"Instead running model hoping fast, can estimate long specification model take using no_fit = TRUE call orsf().","code":"fit_spec <- orsf(pbc_orsf,                   formula = time+status~. -id,                   control = orsf_control_survival(method = 'net'),                   n_tree = 2000,                  no_fit = TRUE)  # how much time it takes to estimate training time: system.time(  time_est <- orsf_time_to_train(fit_spec, n_tree_subset = 5) ) #>    user  system elapsed  #>   0.267   0.001   0.267  # the estimated training time: time_est #> Time difference of 106.6964 secs"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"out-of-bag-data","dir":"Articles","previous_headings":"","what":"Out-of-bag data","title":"Out-of-bag predictions and evaluation","text":"random forests, tree grown bootstrapped version training set. bootstrap samples selected replacement, bootstrapped training set contains two-thirds instances original training set. ‘--bag’ data instances bootstrapped training set.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"out-of-bag-predictions-and-error","dir":"Articles","previous_headings":"","what":"Out-of-bag predictions and error","title":"Out-of-bag predictions and evaluation","text":"tree random forest can make predictions --bag data, --bag predictions can aggregated make ensemble --bag prediction. Since --bag data used grow tree, accuracy ensemble --bag predictions approximate generalization error random forest. --bag prediction error plays central role routines estimate variable importance, e.g. negation importance. fit oblique random survival forest plot distribution ensemble --bag predictions.  Next, let’s check --bag accuracy fit: --bag estimate Harrell’s C-index (default method evaluate --bag predictions) 0.7419135.","code":"fit <- orsf(data = pbc_orsf,              formula = Surv(time, status) ~ . - id,             oobag_pred_type = 'surv',             n_tree = 5,             oobag_pred_horizon = 2000)  hist(fit$pred_oobag,       main = 'Out-of-bag survival predictions at t=2,000') # what function is used to evaluate out-of-bag predictions? fit$eval_oobag$stat_type #> [1] \"Harrell's C-index\"  # what is the output from this function? fit$eval_oobag$stat_values #>           [,1] #> [1,] 0.7419135"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"monitoring-out-of-bag-error","dir":"Articles","previous_headings":"","what":"Monitoring out-of-bag error","title":"Out-of-bag predictions and evaluation","text":"--bag data set contains one-third training set, --bag error estimate usually converges stable value trees added forest. want monitor convergence --bag error oblique random survival forest, can set oobag_eval_every compute --bag error every oobag_eval_every tree. example, let’s compute --bag error fitting tree forest 50 trees:  general, least 500 trees recommended random forest fit. ’re just using 10 illustration.","code":"fit <- orsf(data = pbc_orsf,             formula = Surv(time, status) ~ . - id,             n_tree = 20,             tree_seeds = 2,             oobag_pred_type = 'surv',             oobag_pred_horizon = 2000,             oobag_eval_every = 1)  plot(  x = seq(1, 20, by = 1),  y = fit$eval_oobag$stat_values,   main = 'Out-of-bag C-statistic computed after each new tree is grown.',  xlab = 'Number of trees grown',  ylab = fit$eval_oobag$stat_type )  lines(x=seq(1, 20), y = fit$eval_oobag$stat_values)"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"user-supplied-out-of-bag-evaluation-functions","dir":"Articles","previous_headings":"","what":"User-supplied out-of-bag evaluation functions","title":"Out-of-bag predictions and evaluation","text":"cases, may want control --bag error estimated. example, let’s use Brier score SurvMetrics package: two ways apply function compute --bag error. First, can apply function --bag survival predictions stored ‘aorsf’ objects, e.g: Second, can pass function orsf(), used place Harrell’s C-statistic:","code":"oobag_brier_surv <- function(y_mat, w_vec, s_vec){   # use if SurvMetrics is available  if(requireNamespace(\"SurvMetrics\")){      return(    # output is numeric vector of length 1    as.numeric(     SurvMetrics::Brier(      object = Surv(time = y_mat[, 1], event = y_mat[, 2]),       pre_sp = s_vec,      # t_star in Brier() should match oob_pred_horizon in orsf()      t_star = 2000     )    )   )        }    # if not available, use a dummy version  mean( (y_mat[,2] - (1-s_vec))^2 )     } oobag_brier_surv(y_mat = pbc_orsf[,c('time', 'status')],                  s_vec = fit$pred_oobag) #> Loading required namespace: SurvMetrics #> [1] 0.11869 # instead of copy/pasting the modeling code and then modifying it, # you can just use orsf_update.  fit_brier <- orsf_update(fit, oobag_fun = oobag_brier_surv)  plot(  x = seq(1, 20, by = 1),  y = fit_brier$eval_oobag$stat_values,   main = 'Out-of-bag error computed after each new tree is grown.',  sub = 'For the Brier score, lower values indicate more accurate predictions',  xlab = 'Number of trees grown',  ylab = \"Brier score\" )  lines(x=seq(1, 20), y = fit_brier$eval_oobag$stat_values)"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"specific-instructions-on-user-supplied-functions","dir":"Articles","previous_headings":"User-supplied out-of-bag evaluation functions","what":"Specific instructions on user-supplied functions","title":"Out-of-bag predictions and evaluation","text":"use oobag_fun note following: oobag_fun three inputs: y_mat, w_vec, s_vec survival trees, y_mat two column matrix first column named ‘time’ second named ‘status’. classification trees, y_mat matrix number columns = number distinct classes outcome. regression, y_mat matrix one column. s_vec numeric vector containing predictions oobag_fun return numeric output length 1","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"notes","dir":"Articles","previous_headings":"","what":"Notes","title":"Out-of-bag predictions and evaluation","text":"evaluating --bag error: oobag_pred_horizon input orsf() determines prediction horizon --bag predictions. prediction horizon needs specified evaluate prediction accuracy cases, examples . sure check case using functions, , sure oobag_pred_horizon matches prediction horizon used custom function. functions expect predicted risk (.e., 1 - predicted survival), others expect predicted survival.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"partial-dependence-pd","dir":"Articles","previous_headings":"","what":"Partial dependence (PD)","title":"PD and ICE curves with ORSF","text":"Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model’s prediction. can compute PD individual conditional expectation (ICE) three ways: using -bag predictions training data. -bag PD indicates relationships model learned training. helpful goal interpret model. using --bag predictions training data. --bag PD indicates relationships model learned training using --bag data simulates application model new data. helpful want test model’s reliability fairness new data don’t access large testing set. using predictions new set data. New data PD shows model predicts outcomes observations seen. helpful want test model’s reliability fairness.","code":"library(aorsf) library(ggplot2)"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"classification","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Classification","title":"PD and ICE curves with ORSF","text":"Begin fitting oblique classification random forest: Compute PD using --bag data flipper_length_mm = c(190, 210). Note predicted probabilities returned class probabilities mean column sum 1 take sum class specific value pred_spec variables. example, isn’t case median predicted probability!","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_oob <- orsf_pd_oob(fit_clsf, pred_spec = pred_spec)  pd_oob #> Key: <class> #>        class flipper_length_mm      mean         lwr       medn       upr #>       <fctr>             <num>     <num>       <num>      <num>     <num> #> 1:    Adelie               190 0.6182417 0.206899034 0.75537171 0.9796439 #> 2:    Adelie               210 0.4348386 0.019519733 0.56802082 0.8620694 #> 3: Chinstrap               190 0.2114905 0.018420139 0.15561560 0.7174734 #> 4: Chinstrap               210 0.1806274 0.020409141 0.09928047 0.6990198 #> 5:    Gentoo               190 0.1702678 0.001281382 0.02830728 0.5733438 #> 6:    Gentoo               210 0.3845340 0.072260715 0.20258335 0.9519486 sum(pd_oob[flipper_length_mm == 190, mean]) #> [1] 1 sum(pd_oob[flipper_length_mm == 190, medn]) #> [1] 0.9392946"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"regression","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Regression","title":"PD and ICE curves with ORSF","text":"Begin fitting oblique regression random forest: Compute PD using new data flipper_length_mm = c(190, 210). can also let pred_spec_auto pick reasonable values like : default, combinations variables used. However, can also look variables one one, separately, like : can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new #>    flipper_length_mm     mean      lwr     medn      upr #>                <num>    <num>    <num>    <num>    <num> #> 1:               190 42.96571 37.09805 43.69769 48.72301 #> 2:               210 45.66012 40.50693 46.31577 51.65163 pred_spec = pred_spec_auto(species, island, body_mass_g)  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new #>       species    island body_mass_g     mean      lwr     medn      upr #>        <fctr>    <fctr>       <num>    <num>    <num>    <num>    <num> #>  1:    Adelie    Biscoe        3200 40.31374 37.24373 40.31967 44.22824 #>  2: Chinstrap    Biscoe        3200 45.10582 42.63342 45.10859 47.60119 #>  3:    Gentoo    Biscoe        3200 42.81649 40.19221 42.55664 46.84035 #>  4:    Adelie     Dream        3200 40.16219 36.95895 40.34633 43.90681 #>  5: Chinstrap     Dream        3200 46.21778 43.53954 45.90929 49.19173 #>  6:    Gentoo     Dream        3200 42.60465 39.89647 42.63520 46.28769 #>  7:    Adelie Torgersen        3200 39.91652 36.80227 39.79806 43.68842 #>  8: Chinstrap Torgersen        3200 44.27807 41.95470 44.40742 46.68848 #>  9:    Gentoo Torgersen        3200 42.09510 39.49863 41.80049 45.81833 #> 10:    Adelie    Biscoe        3550 40.77971 38.04027 40.59561 44.57505 #> 11: Chinstrap    Biscoe        3550 45.81304 43.52102 45.73116 48.36366 #> 12:    Gentoo    Biscoe        3550 43.31233 40.77355 43.03077 47.22936 #> 13:    Adelie     Dream        3550 40.77741 38.07399 40.78175 44.37273 #> 14: Chinstrap     Dream        3550 47.30926 44.80493 46.77540 50.47092 #> 15:    Gentoo     Dream        3550 43.26955 40.86119 43.16204 46.89190 #> 16:    Adelie Torgersen        3550 40.25780 37.35251 40.07871 44.04576 #> 17: Chinstrap Torgersen        3550 44.77911 42.60161 44.81944 47.14986 #> 18:    Gentoo Torgersen        3550 42.49520 39.95866 42.14160 46.26237 #> 19:    Adelie    Biscoe        3975 41.61744 38.94515 41.36634 45.38752 #> 20: Chinstrap    Biscoe        3975 46.59363 44.59970 46.44923 49.11457 #> 21:    Gentoo    Biscoe        3975 44.07857 41.60792 43.74562 47.85109 #> 22:    Adelie     Dream        3975 41.50511 39.06187 41.24741 45.13027 #> 23: Chinstrap     Dream        3975 48.14978 45.87390 47.54867 51.50683 #> 24:    Gentoo     Dream        3975 44.01928 41.70577 43.84099 47.50470 #> 25:    Adelie Torgersen        3975 40.94764 38.12519 40.66759 44.73689 #> 26: Chinstrap Torgersen        3975 45.44820 43.49986 45.44036 47.63243 #> 27:    Gentoo Torgersen        3975 43.13791 40.70628 42.70627 46.87306 #> 28:    Adelie    Biscoe        4700 42.93914 40.48463 42.44768 46.81756 #> 29: Chinstrap    Biscoe        4700 47.18517 45.40866 47.07739 49.55747 #> 30:    Gentoo    Biscoe        4700 45.32541 43.08173 44.93498 49.23391 #> 31:    Adelie     Dream        4700 42.73806 40.44229 42.22226 46.49936 #> 32: Chinstrap     Dream        4700 48.37278 46.34335 48.00781 51.18955 #> 33:    Gentoo     Dream        4700 45.09132 42.88328 44.79530 48.82180 #> 34:    Adelie Torgersen        4700 42.09349 39.72074 41.56168 45.68838 #> 35: Chinstrap Torgersen        4700 46.16807 44.38410 46.09525 48.35127 #> 36:    Gentoo Torgersen        4700 44.31621 42.18968 43.81773 47.98024 #> 37:    Adelie    Biscoe        5300 43.89769 41.43335 43.28504 48.10892 #> 38: Chinstrap    Biscoe        5300 47.53721 45.66038 47.52770 49.88701 #> 39:    Gentoo    Biscoe        5300 46.16115 43.81722 45.59309 50.57469 #> 40:    Adelie     Dream        5300 43.59846 41.25825 43.24518 47.46193 #> 41: Chinstrap     Dream        5300 48.48139 46.36282 48.25679 51.02996 #> 42:    Gentoo     Dream        5300 45.91819 43.62832 45.54110 49.91622 #> 43:    Adelie Torgersen        5300 42.92879 40.66576 42.31072 46.76406 #> 44: Chinstrap Torgersen        5300 46.59576 44.80400 46.49196 49.03906 #> 45:    Gentoo Torgersen        5300 45.11384 42.95190 44.51289 49.27629 #>       species    island body_mass_g     mean      lwr     medn      upr pd_new <- orsf_pd_new(fit_regr,                        expand_grid = FALSE,                       pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new #>        variable value     level     mean      lwr     medn      upr #>          <char> <num>    <char>    <num>    <num>    <num>    <num> #>  1:     species    NA    Adelie 41.90271 37.10417 41.51723 48.51478 #>  2:     species    NA Chinstrap 47.11314 42.40419 46.96478 51.51392 #>  3:     species    NA    Gentoo 44.37038 39.87306 43.89889 51.21635 #>  4:      island    NA    Biscoe 44.21332 37.22711 45.27862 51.21635 #>  5:      island    NA     Dream 44.43354 37.01471 45.57261 51.51392 #>  6:      island    NA Torgersen 43.29539 37.01513 44.26924 49.84391 #>  7: body_mass_g  3200      <NA> 42.84625 37.03978 43.95991 49.19173 #>  8: body_mass_g  3550      <NA> 43.53326 37.56730 44.43756 50.47092 #>  9: body_mass_g  3975      <NA> 44.30431 38.31567 45.22089 51.50683 #> 10: body_mass_g  4700      <NA> 45.22525 39.88199 46.34680 51.18955 #> 11: body_mass_g  5300      <NA> 45.91412 40.84742 46.95327 51.48851 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = custom_pred_spec,                       new_data = penguins_orsf_test)  pd_new #>    species island     mean      lwr     medn      upr #>     <fctr> <fctr>    <num>    <num>    <num>    <num> #> 1:  Adelie Biscoe 41.98024 37.22711 41.65252 48.51478"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"survival","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Survival","title":"PD and ICE curves with ORSF","text":"Begin fitting oblique survival random forest: Compute PD using -bag data bili = c(1,2,3,4,5): don’t specific values variable mind, let pred_spec_auto pick : Specify pred_horizon get PD value:","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) pd_train <- orsf_pd_inb(fit_surv, pred_spec = list(bili = 1:5)) pd_train #>    pred_horizon  bili      mean        lwr      medn       upr #>           <num> <num>     <num>      <num>     <num>     <num> #> 1:      1826.25     1 0.2575450 0.02234786 0.1334170 0.8917942 #> 2:      1826.25     2 0.3130469 0.06853733 0.1906695 0.9203372 #> 3:      1826.25     3 0.3711963 0.11409793 0.2582027 0.9416791 #> 4:      1826.25     4 0.4248968 0.15648381 0.3334579 0.9591581 #> 5:      1826.25     5 0.4671699 0.20123406 0.3855137 0.9655296 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili)) pd_train #>    pred_horizon  bili      mean        lwr      medn       upr #>           <num> <num>     <num>      <num>     <num>     <num> #> 1:      1826.25 0.590 0.2493753 0.02035041 0.1250263 0.8823385 #> 2:      1826.25 0.725 0.2517103 0.02060111 0.1281814 0.8836536 #> 3:      1826.25 1.500 0.2807082 0.03964900 0.1601715 0.9040617 #> 4:      1826.25 3.500 0.3968251 0.13431288 0.2934565 0.9501230 #> 5:      1826.25 7.210 0.5352155 0.27869513 0.4658256 0.9782084 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili),                         pred_horizon = seq(500, 3000, by = 500)) pd_train #>     pred_horizon  bili       mean          lwr        medn       upr #>            <num> <num>      <num>        <num>       <num>     <num> #>  1:          500 0.590 0.06217164 0.0004433990 0.008765301 0.5918852 #>  2:         1000 0.590 0.14282695 0.0057937418 0.056509484 0.7381953 #>  3:         1500 0.590 0.20944972 0.0136094784 0.092379507 0.8577223 #>  4:         2000 0.590 0.26917477 0.0230476894 0.146421502 0.8918696 #>  5:         2500 0.590 0.31901518 0.0631155452 0.203673185 0.9034059 #>  6:         3000 0.590 0.39244000 0.0911566314 0.302726475 0.9239494 #>  7:          500 0.725 0.06287876 0.0004462367 0.009001904 0.5980510 #>  8:         1000 0.725 0.14409310 0.0063321712 0.056833294 0.7448126 #>  9:         1500 0.725 0.21143724 0.0140736894 0.093685200 0.8597396 #> 10:         2000 0.725 0.27150368 0.0235448705 0.147022224 0.8940497 #> 11:         2500 0.725 0.32014805 0.0626303822 0.203946002 0.9073003 #> 12:         3000 0.725 0.39518173 0.0911457406 0.308428469 0.9252028 #> 13:          500 1.500 0.06712295 0.0012717884 0.011028398 0.6240769 #> 14:         1000 1.500 0.15802582 0.0114789623 0.068332010 0.7683888 #> 15:         1500 1.500 0.23407183 0.0287320952 0.117289745 0.8789647 #> 16:         2000 1.500 0.30235436 0.0467927208 0.180096425 0.9143235 #> 17:         2500 1.500 0.35354874 0.0845866747 0.238415966 0.9265099 #> 18:         3000 1.500 0.43604287 0.1311103304 0.348078730 0.9438196 #> 19:          500 3.500 0.08677320 0.0052087533 0.028244374 0.6741102 #> 20:         1000 3.500 0.22427808 0.0519179775 0.139857107 0.8277541 #> 21:         1500 3.500 0.32788654 0.0901983241 0.217982772 0.9371150 #> 22:         2000 3.500 0.41708208 0.1445328597 0.313224605 0.9566091 #> 23:         2500 3.500 0.49334883 0.2195110942 0.402932569 0.9636221 #> 24:         3000 3.500 0.56094391 0.2647541788 0.503509668 0.9734948 #> 25:          500 7.210 0.12591911 0.0220920570 0.063283130 0.7522611 #> 26:         1000 7.210 0.32642477 0.1353851175 0.259731888 0.8879218 #> 27:         1500 7.210 0.46409472 0.2181840827 0.387142510 0.9700903 #> 28:         2000 7.210 0.55116942 0.2912654769 0.484118150 0.9811496 #> 29:         2500 7.210 0.62008114 0.3709845684 0.568822502 0.9844945 #> 30:         3000 7.210 0.68030697 0.4247511750 0.646009789 0.9888637 #>     pred_horizon  bili       mean          lwr        medn       upr"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"one-variable-moving-horizon","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"One variable, moving horizon","title":"PD and ICE curves with ORSF","text":"next sections, update orsf_fit include data pbc_orsf instead just training sample: effect predictor varies time? Partial dependence can show .  inspection, can see males higher risk females difference risk grows time. can also seen viewing ratio expected risk time:  get view PD number variables training data, use orsf_summarize_uni(). function computes --bag PD important n_variables returns nicely formatted view output: ‘summary’ object can converted data.table downstream plotting tables.","code":"# a rare case of modify_in_place = TRUE orsf_update(fit_surv,              data = pbc_orsf,              modify_in_place = TRUE)  fit_surv #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 500 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 21.038 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.84 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> ----------------------------------------- pd_sex_tv <- orsf_pd_oob(fit_surv,                           pred_spec = pred_spec_auto(sex),                          pred_horizon = seq(365, 365*5))  ggplot(pd_sex_tv) +  aes(x = pred_horizon, y = mean, color = sex) +   geom_line() +  labs(x = 'Time since baseline',       y = 'Expected risk') library(data.table)  ratio_tv <- pd_sex_tv[  , .(ratio = mean[sex == 'm'] / mean[sex == 'f']), by = pred_horizon ]  ggplot(ratio_tv, aes(x = pred_horizon, y = ratio)) +   geom_line(color = 'grey') +   geom_smooth(color = 'black', se = FALSE) +   labs(x = 'time since baseline',       y = 'ratio in expected risk for males versus females') pd_smry <- orsf_summarize_uni(fit_surv, n_variables = 4)  pd_smry #>  #> -- ascites (VI Rank: 1) ------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3083328 0.1985589 0.06581247 0.5241336 #>       1 0.4702396 0.3975953 0.27481738 0.6564321 #>  #> -- bili (VI Rank: 2) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2356543 0.1536301 0.05872720 0.3719578 #>    0.80 0.2398021 0.1609720 0.06167673 0.3776136 #>    1.40 0.2613612 0.1809950 0.07893386 0.4064484 #>    3.52 0.3702763 0.3118827 0.17050712 0.5447088 #>    7.25 0.4780580 0.4406202 0.29442977 0.6434075 #>  #> -- edema (VI Rank: 3) --------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3035731 0.1840849 0.06509174 0.5228237 #>     0.5 0.3558716 0.2649457 0.11132293 0.5831396 #>       1 0.4693915 0.3961470 0.28211662 0.6331870 #>  #> -- copper (VI Rank: 4) -------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    25.5 0.2632768 0.1622871 0.05581251 0.4308234 #>    42.8 0.2707739 0.1703028 0.05887747 0.4418590 #>    74.0 0.2908707 0.1940176 0.07155433 0.4768302 #>     129 0.3444258 0.2651729 0.11918406 0.5574967 #>     214 0.4245218 0.3577346 0.21408331 0.6238041 #>  #>  Predicted risk at time t = 1826.25 for top 4 predictors head(as.data.table(pd_smry)) #>    variable importance  Value      Mean    Median     25th %    75th % #>      <char>      <num> <char>     <num>     <num>      <num>     <num> #> 1:  ascites  0.4960630      0 0.3083328 0.1985589 0.06581247 0.5241336 #> 2:  ascites  0.4960630      1 0.4702396 0.3975953 0.27481738 0.6564321 #> 3:     bili  0.4160074   0.60 0.2356543 0.1536301 0.05872720 0.3719578 #> 4:     bili  0.4160074   0.80 0.2398021 0.1609720 0.06167673 0.3776136 #> 5:     bili  0.4160074   1.40 0.2613612 0.1809950 0.07893386 0.4064484 #> 6:     bili  0.4160074   3.52 0.3702763 0.3118827 0.17050712 0.5447088 #>    pred_horizon  level #>           <num> <char> #> 1:      1826.25      0 #> 2:      1826.25      1 #> 3:      1826.25   <NA> #> 4:      1826.25   <NA> #> 5:      1826.25   <NA> #> 6:      1826.25   <NA>"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"multiple-variables-jointly","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Multiple variables, jointly","title":"PD and ICE curves with ORSF","text":"Partial dependence can show expected value model’s predictions function specific predictor, function multiple predictors. instance, can estimate predicted risk joint function bili, edema, trt:  inspection, model’s predictions indicate slightly lower risk placebo group, seem change much different values bili edema. clear increase predicted risk higher levels edema higher levels bili slope predicted risk function bili appears highest among patients edema 0.5. effect bili modified edema 0.5? quick sanity check coxph suggests .","code":"pred_spec = pred_spec_auto(bili, edema, trt)  pd_bili_edema <- orsf_pd_oob(fit_surv, pred_spec)  ggplot(pd_bili_edema) +   aes(x = bili, y = medn, col = trt, linetype = edema) +   geom_line() +   labs(y = 'Expected predicted risk') library(survival)  pbc_orsf$edema_05 <- ifelse(pbc_orsf$edema == '0.5', 'yes', 'no')  fit_cph <- coxph(Surv(time,status) ~ edema_05 * bili,                   data = pbc_orsf)  anova(fit_cph) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>                loglik   Chisq Df Pr(>|Chi|)     #> NULL          -550.19                           #> edema_05      -546.83  6.7248  1   0.009508 **  #> bili          -513.59 66.4689  1  3.555e-16 *** #> edema_05:bili -510.54  6.1112  1   0.013433 *   #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"find-interactions-using-pd","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Find interactions using PD","title":"PD and ICE curves with ORSF","text":"Random forests good using interactions, less good telling . Use orsf_vint() apply method variable interaction scoring PD described Greenwell et al (2018). can take little lots predictors, seems work best continuous continuous interactions. Interactions categorical variables sometimes - - scored. scores include partial dependence values can pull plot:  use sanity check coxph see interactions detected using standard test: Note: Caution warranted interpreting statistical hypotheses motivated data tested . Results like p-values interaction shown interpreted exploratory.","code":"# use just the continuous variables preds <- names(fit_surv$get_means())  vint_scores <- orsf_vint(fit_surv, predictors = preds)  vint_scores #>            interaction      score          pd_values #>                 <char>      <num>             <list> #>  1:   albumin..protime 1.15973071 <data.table[25x9]> #>  2:    copper..protime 0.79587419 <data.table[25x9]> #>  3:         bili..chol 0.74163213 <data.table[25x9]> #>  4:          age..bili 0.74097713 <data.table[25x9]> #>  5:       bili..copper 0.71610872 <data.table[25x9]> #>  6:      bili..albumin 0.67849272 <data.table[25x9]> #>  7:      bili..protime 0.59576252 <data.table[25x9]> #>  8:       albumin..ast 0.59439149 <data.table[25x9]> #>  9:     bili..platelet 0.56627946 <data.table[25x9]> #> 10:       ast..protime 0.56220910 <data.table[25x9]> #> 11:    albumin..copper 0.54057277 <data.table[25x9]> #> 12:         bili..trig 0.52794450 <data.table[25x9]> #> 13:       copper..trig 0.50661291 <data.table[25x9]> #> 14:       age..protime 0.45818900 <data.table[25x9]> #> 15:           age..ast 0.44410913 <data.table[25x9]> #> 16:      age..platelet 0.42607794 <data.table[25x9]> #> 17:  albumin..platelet 0.41293884 <data.table[25x9]> #> 18:      chol..albumin 0.39547725 <data.table[25x9]> #> 19:  platelet..protime 0.38674364 <data.table[25x9]> #> 20:        age..copper 0.36230121 <data.table[25x9]> #> 21:        copper..ast 0.35089611 <data.table[25x9]> #> 22:      trig..protime 0.29339926 <data.table[25x9]> #> 23:     bili..alk.phos 0.25729691 <data.table[25x9]> #> 24:      chol..protime 0.24424042 <data.table[25x9]> #> 25:   copper..alk.phos 0.22156162 <data.table[25x9]> #> 26:          bili..ast 0.21483757 <data.table[25x9]> #> 27:         chol..trig 0.20737852 <data.table[25x9]> #> 28:     trig..platelet 0.18819009 <data.table[25x9]> #> 29:      age..alk.phos 0.17844523 <data.table[25x9]> #> 30:       chol..copper 0.17025610 <data.table[25x9]> #> 31:   copper..platelet 0.16009542 <data.table[25x9]> #> 32:       age..albumin 0.15186211 <data.table[25x9]> #> 33:     alk.phos..trig 0.14212275 <data.table[25x9]> #> 34:          age..trig 0.12185330 <data.table[25x9]> #> 35:  albumin..alk.phos 0.12061152 <data.table[25x9]> #> 36:          chol..ast 0.10767371 <data.table[25x9]> #> 37:     chol..alk.phos 0.10712377 <data.table[25x9]> #> 38:      ast..platelet 0.09157413 <data.table[25x9]> #> 39:  alk.phos..protime 0.08277287 <data.table[25x9]> #> 40:      alk.phos..ast 0.08062752 <data.table[25x9]> #> 41:          ast..trig 0.07157470 <data.table[25x9]> #> 42:          age..chol 0.05564449 <data.table[25x9]> #> 43:     chol..platelet 0.04813670 <data.table[25x9]> #> 44: alk.phos..platelet 0.04760897 <data.table[25x9]> #> 45:      albumin..trig 0.04689324 <data.table[25x9]> #>            interaction      score          pd_values # top scoring interaction pd_top <- vint_scores$pd_values[[1]]  # center pd values so it's easier to see the interaction effect pd_top[, mean := mean - mean[1], by = var_2_value]  ggplot(pd_top) +   aes(x = var_1_value,       y = mean,       color = factor(var_2_value),       group = factor(var_2_value)) +   geom_line() +   labs(x = \"albumin\",        y = \"predicted mortality (centered)\",       color = \"protime\") # test the top score (expect strong interaction) fit_cph <- coxph(Surv(time,status) ~ albumin * protime,                   data = pbc_orsf)  anova(fit_cph) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>                  loglik  Chisq Df Pr(>|Chi|)     #> NULL            -550.19                          #> albumin         -526.29 47.801  1  4.717e-12 *** #> protime         -514.89 22.806  1  1.792e-06 *** #> albumin:protime -511.76  6.252  1    0.01241 *   #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"individual-conditional-expectations-ice","dir":"Articles","previous_headings":"","what":"Individual conditional expectations (ICE)","title":"PD and ICE curves with ORSF","text":"Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"classification-1","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Classification","title":"PD and ICE curves with ORSF","text":"Compute ICE using --bag data flipper_length_mm = c(190, 210). two identifiers output: id_variable identifier current value variable(s) data. redundant one variable, helpful multiple variables. id_row identifier observation original data. Note predicted probabilities returned class observation data. Predicted probabilities given observation given variable value sum 1. example,","code":"pred_spec <- list(flipper_length_mm = c(190, 210))  ice_oob <- orsf_ice_oob(fit_clsf, pred_spec = pred_spec)  ice_oob #> Key: <class> #>      id_variable id_row  class flipper_length_mm       pred #>            <int> <char> <fctr>             <num>      <num> #>   1:           1      1 Adelie               190 0.92045213 #>   2:           1      2 Adelie               190 0.80427932 #>   3:           1      3 Adelie               190 0.84342550 #>   4:           1      4 Adelie               190 0.93514694 #>   5:           1      5 Adelie               190 0.97172229 #>  ---                                                        #> 896:           2    146 Gentoo               210 0.25779089 #> 897:           2    147 Gentoo               210 0.04806888 #> 898:           2    148 Gentoo               210 0.07926342 #> 899:           2    149 Gentoo               210 0.84597108 #> 900:           2    150 Gentoo               210 0.10191162 ice_oob %>%  .[flipper_length_mm == 190] %>%   .[id_row == 1] %>%   .[['pred']] %>%   sum() #> [1] 1"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"regression-1","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Regression","title":"PD and ICE curves with ORSF","text":"Compute ICE using new data flipper_length_mm = c(190, 210). can also let pred_spec_auto pick reasonable values like : default, combinations variables used. However, can also look variables one one, separately, like : can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"pred_spec <- list(flipper_length_mm = c(190, 210))  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new #>      id_variable id_row flipper_length_mm     pred #>            <int> <char>             <num>    <num> #>   1:           1      1               190 37.94483 #>   2:           1      2               190 37.61595 #>   3:           1      3               190 37.53681 #>   4:           1      4               190 39.49476 #>   5:           1      5               190 38.95635 #>  ---                                               #> 362:           2    179               210 51.80471 #> 363:           2    180               210 47.27183 #> 364:           2    181               210 47.05031 #> 365:           2    182               210 50.39028 #> 366:           2    183               210 48.44774 pred_spec = pred_spec_auto(species, island, body_mass_g)  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new #>       id_variable id_row species    island body_mass_g     pred #>             <int> <char>  <fctr>    <fctr>       <num>    <num> #>    1:           1      1  Adelie    Biscoe        3200 37.78339 #>    2:           1      2  Adelie    Biscoe        3200 37.73273 #>    3:           1      3  Adelie    Biscoe        3200 37.71248 #>    4:           1      4  Adelie    Biscoe        3200 40.25782 #>    5:           1      5  Adelie    Biscoe        3200 40.04074 #>   ---                                                           #> 8231:          45    179  Gentoo Torgersen        5300 46.14559 #> 8232:          45    180  Gentoo Torgersen        5300 43.98050 #> 8233:          45    181  Gentoo Torgersen        5300 44.59837 #> 8234:          45    182  Gentoo Torgersen        5300 44.85146 #> 8235:          45    183  Gentoo Torgersen        5300 44.23710 ice_new <- orsf_ice_new(fit_regr,                          expand_grid = FALSE,                         pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new #>       id_variable id_row    variable value  level     pred #>             <int> <char>      <char> <num> <char>    <num> #>    1:           1      1     species    NA Adelie 37.74136 #>    2:           1      2     species    NA Adelie 37.42367 #>    3:           1      3     species    NA Adelie 37.04598 #>    4:           1      4     species    NA Adelie 39.89602 #>    5:           1      5     species    NA Adelie 39.14848 #>   ---                                                      #> 2009:           5    179 body_mass_g  5300   <NA> 51.50196 #> 2010:           5    180 body_mass_g  5300   <NA> 47.27055 #> 2011:           5    181 body_mass_g  5300   <NA> 48.34064 #> 2012:           5    182 body_mass_g  5300   <NA> 48.75828 #> 2013:           5    183 body_mass_g  5300   <NA> 48.11020 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = custom_pred_spec,                         new_data = penguins_orsf_test)  ice_new #>      id_variable id_row species island     pred #>            <int> <char>  <fctr> <fctr>    <num> #>   1:           1      1  Adelie Biscoe 38.52327 #>   2:           1      2  Adelie Biscoe 38.32073 #>   3:           1      3  Adelie Biscoe 37.71248 #>   4:           1      4  Adelie Biscoe 41.68380 #>   5:           1      5  Adelie Biscoe 40.91140 #>  ---                                            #> 179:           1    179  Adelie Biscoe 43.09493 #> 180:           1    180  Adelie Biscoe 38.79455 #> 181:           1    181  Adelie Biscoe 39.37734 #> 182:           1    182  Adelie Biscoe 40.71952 #> 183:           1    183  Adelie Biscoe 39.34501"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"survival-1","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Survival","title":"PD and ICE curves with ORSF","text":"Compute ICE using -bag data bili = c(1,2,3,4,5): don’t specific values variable mind, let pred_spec_auto pick : Specify pred_horizon get ICE value: Multi-prediction horizon ice comes minimal extra computational cost. Use fine grid time values assess whether predictors time-varying effects.","code":"ice_train <- orsf_ice_inb(fit_surv, pred_spec = list(bili = 1:5)) ice_train #>       id_variable id_row pred_horizon  bili      pred #>             <int> <char>        <num> <num>     <num> #>    1:           1      1      1826.25     1 0.9015162 #>    2:           1      2      1826.25     1 0.1019426 #>    3:           1      3      1826.25     1 0.6821646 #>    4:           1      4      1826.25     1 0.3623411 #>    5:           1      5      1826.25     1 0.1374271 #>   ---                                                 #> 1376:           5    272      1826.25     5 0.2650957 #> 1377:           5    273      1826.25     5 0.3065318 #> 1378:           5    274      1826.25     5 0.3503776 #> 1379:           5    275      1826.25     5 0.1652897 #> 1380:           5    276      1826.25     5 0.3549165 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili)) ice_train #>       id_variable id_row pred_horizon  bili       pred #>             <int> <char>        <num> <num>      <num> #>    1:           1      1      1826.25  0.60 0.89210440 #>    2:           1      2      1826.25  0.60 0.09186876 #>    3:           1      3      1826.25  0.60 0.65503431 #>    4:           1      4      1826.25  0.60 0.34622748 #>    5:           1      5      1826.25  0.60 0.13310425 #>   ---                                                  #> 1376:           5    272      1826.25  7.25 0.31258148 #> 1377:           5    273      1826.25  7.25 0.35478676 #> 1378:           5    274      1826.25  7.25 0.41559176 #> 1379:           5    275      1826.25  7.25 0.25301890 #> 1380:           5    276      1826.25  7.25 0.44533769 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili),                           pred_horizon = seq(500, 3000, by = 500)) ice_train #>       id_variable id_row pred_horizon  bili      pred #>             <int> <char>        <num> <num>     <num> #>    1:           1      1          500  0.60 0.5949598 #>    2:           1      1         1000  0.60 0.7652137 #>    3:           1      1         1500  0.60 0.8751746 #>    4:           1      1         2000  0.60 0.9057135 #>    5:           1      1         2500  0.60 0.9231915 #>   ---                                                 #> 8276:           5    276         1000  7.25 0.2111306 #> 8277:           5    276         1500  7.25 0.3642278 #> 8278:           5    276         2000  7.25 0.4850492 #> 8279:           5    276         2500  7.25 0.5720362 #> 8280:           5    276         3000  7.25 0.6206786"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"visualizing-ice-curves","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Visualizing ICE curves","title":"PD and ICE curves with ORSF","text":"Inspecting ICE curves observation can help identify whether heterogeneity model’s predictions. .e., effect variable follow pattern data, groups variable impacts risk differently? going turn boundary checking orsf_ice_oob setting boundary_checks = FALSE, allow generate ICE curves go beyond 90th percentile bili. plots, helpful scale ICE data. subtract initial value predicted risk (.e., bili = 1) observation’s conditional expectation values. , Every curve start 0 plot shows change predicted risk function bili. Now can visualize curves.  inspection figure, individual slopes cluster around overall trend - Good! small number individual slopes appear flat. may helpful investigate .","code":"pred_spec <- list(bili = seq(1, 10, length.out = 25))  ice_oob <- orsf_ice_oob(fit_surv, pred_spec, boundary_checks = FALSE)  ice_oob #>       id_variable id_row pred_horizon  bili      pred #>             <int> <char>        <num> <num>     <num> #>    1:           1      1      1826.25     1 0.8790861 #>    2:           1      2      1826.25     1 0.8132035 #>    3:           1      3      1826.25     1 0.6240238 #>    4:           1      4      1826.25     1 0.7461603 #>    5:           1      5      1826.25     1 0.5754091 #>   ---                                                 #> 6896:          25    272      1826.25    10 0.7018976 #> 6897:          25    273      1826.25    10 0.4606246 #> 6898:          25    274      1826.25    10 0.3347082 #> 6899:          25    275      1826.25    10 0.6046024 #> 6900:          25    276      1826.25    10 0.2789017 ice_oob[, pred_subtract := rep(pred[id_variable==1], times=25)] ice_oob[, pred := pred - pred_subtract] ggplot(ice_oob, aes(x = bili,                      y = pred,                      group = id_row)) +   geom_line(alpha = 0.15) +   labs(y = 'Change in predicted risk') +  geom_smooth(se = FALSE, aes(group = 1))"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"limitations-of-pd","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Limitations of PD","title":"PD and ICE curves with ORSF","text":"Partial dependence number known limitations assumptions users aware (see Hooker, 2021). particular, partial dependence less intuitive >2 predictors examined jointly, assumed feature(s) partial dependence computed correlated features (likely true many cases). Accumulated local effect plots can used (see ) case feature independence valid assumption.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"references","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"References","title":"PD and ICE curves with ORSF","text":"Hooker, Giles, Mentch, Lucas, Zhou, Siyu (2021). “Unrestricted permutation forces extrapolation: variable importance requires least one model, free variable importance.” Statistics Computing, 31, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Byron Jaeger. Author, maintainer. Nicholas Pajewski. Contributor. Sawyer Welden. Contributor. Christopher Jackson. Reviewer. Marvin Wright. Reviewer. Lukas Burk. Reviewer.","code":""},{"path":"https://bcjaeger.github.io/aorsf/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Jaeger et al. (2022). aorsf: R package supervised learning using oblique random survival forest. Journal Open Source Software, 7(77), 4705. https://doi.org/10.21105/joss.04705. Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM. Accelerated interpretable oblique random survival forests. Journal Computational Graphical Statistics. 2023 Aug 3:1-6. Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min YI, Mcclure LA, Howard G, Simon N. Oblique Random Survival Forests. Annals Applied Statistics. 13(3): 1847-1883. URL https://doi.org/10.1214/19-AOAS1261 DOI: 10.1214/19-AOAS1261","code":"@Article{,   title = {aorsf: An R package for supervised learning using the oblique random survival forest},   author = {Byron C. Jaeger and Sawyer Welden and Kristin Lenoir and Nicholas M. Pajewski},   journal = {Journal of Open Source Software},   year = {2022},   volume = {7},   number = {77},   pages = {4705},   url = {https://doi.org/10.21105/joss.04705}, } @Article{,   title = {Accelerated and interpretable oblique random survival forests},   author = {Byron C. Jaeger and Sawyer Welden and Kristin Lenoir and Jaime L. Speiser and Matthew W. Segar and Ambarish Pandey and Nicholas M. Pajewski},   journal = {Journal of Computational and Graphical Statistics},   year = {2023},   url = {https://doi.org/10.1080/10618600.2023.2231048}, } @Article{,   title = {Oblique Random Survival Forests},   author = {Byron C. Jaeger and D. Leann Long and Dustin M. Long and Mario Sims and Jeff M. Szychowski and Yuan-I Min and Leslie A. Mcclure and George Howard and Noah Simon},   journal = {Annals of Applied Statistics},   year = {2019},   volume = {13},   number = {3},   pages = {1847--1883},   url = {https://doi.org/10.1214/19-AOAS1261}, }"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"aorsf-","dir":"","previous_headings":"","what":"Accelerated Oblique Random Forests","title":"Accelerated Oblique Random Forests","text":"Fit, interpret, make predictions oblique random forests (RFs).","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"why-aorsf","dir":"","previous_headings":"","what":"Why aorsf?","title":"Accelerated Oblique Random Forests","text":"Fast versatile tools oblique RFs.1 Accurate predictions.2 Intuitive design formula based interface. Extensive input checks informative error messages. Compatible tidymodels mlr3","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Accelerated Oblique Random Forests","text":"can install aorsf CRAN using can install development version aorsf GitHub :","code":"install.packages(\"aorsf\") # install.packages(\"remotes\") remotes::install_github(\"ropensci/aorsf\")"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"get-started","dir":"","previous_headings":"","what":"Get started","title":"Accelerated Oblique Random Forests","text":"aorsf fits several types oblique RFs orsf() function, including classification, regression, survival RFs. classification, fit oblique RF predict penguin species using penguin data magnificent palmerpenguins R package regression, use data predict bill length penguins: personal favorite oblique survival RF accelerated Cox regression first type oblique RF aorsf provided (see JCGS paper). , use predict mortality risk following diagnosis primary biliary cirrhosis:","code":"library(aorsf) library(tidyverse) # An oblique classification RF penguin_fit <- orsf(data = penguins_orsf,                     n_tree = 5,                      formula = species ~ .)  penguin_fit #> ---------- Oblique random classification forest #>  #>      Linear combinations: Accelerated Logistic regression #>           N observations: 333 #>                N classes: 3 #>                  N trees: 5 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 6 #> Min observations in leaf: 5 #>           OOB stat value: 0.99 #>            OOB stat type: AUC-ROC #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique regression RF bill_fit <- orsf(data = penguins_orsf,                   n_tree = 5,                   formula = bill_length_mm ~ .)  bill_fit #> ---------- Oblique random regression forest #>  #>      Linear combinations: Accelerated Linear regression #>           N observations: 333 #>                  N trees: 5 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 42.6 #> Min observations in leaf: 5 #>           OOB stat value: 0.76 #>            OOB stat type: RSQ #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique survival RF pbc_fit <- orsf(data = pbc_orsf,                  n_tree = 5,                 formula = Surv(time, status) ~ . - id)  pbc_fit #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 5 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 20.4 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.79 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"what-does-oblique-mean","dir":"","previous_headings":"","what":"What does “oblique” mean?","title":"Accelerated Oblique Random Forests","text":"Decision trees grown splitting set training data non-overlapping subsets, goal similarity within new subsets . subsets created single predictor, decision tree axis-based subset boundaries perpendicular axis predictor. linear combinations (.e., weighted sum) variables used instead single variable, tree oblique boundaries neither parallel perpendicular axis. Figure: Decision trees classification axis-based splitting (left) oblique splitting (right). Cases orange squares; controls purple circles. trees partition predictor space defined variables X1 X2, oblique splits better job separating two classes.  , difference translate real data, impact random forests comprising hundreds axis-based oblique trees? demonstrate using penguin data.3 also use function make several plots: also use grid points plotting decision surfaces: use orsf mtry=1 fit axis-based trees: Next use orsf_update copy modify original model, expanding fit oblique tree using mtry=2 instead mtry=1, include 500 trees instead 1: now need visualize decision surfaces using predictions four fits: Figure: Axis-based oblique decision surfaces single tree ensemble 500 trees. Axis-based trees boundaries perpendicular predictor axes, whereas oblique trees can boundaries neither parallel perpendicular predictor axes. Axis-based forests tend ‘step-function’ decision boundaries, oblique forests tend smooth decision boundaries.","code":"plot_decision_surface <- function(predictions, title, grid){    # this is not a general function for plotting  # decision surfaces. It just helps to minimize   # copying and pasting of code.    class_preds <- bind_cols(grid, predictions) %>%   pivot_longer(cols = c(Adelie,                         Chinstrap,                         Gentoo)) %>%   group_by(flipper_length_mm, bill_length_mm) %>%   arrange(desc(value)) %>%   slice(1)    cols <- c(\"darkorange\", \"purple\", \"cyan4\")   ggplot(class_preds, aes(bill_length_mm, flipper_length_mm)) +   geom_contour_filled(aes(z = value, fill = name),                       alpha = .25) +   geom_point(data = penguins_orsf,              aes(color = species, shape = species),              alpha = 0.5) +   scale_color_manual(values = cols) +   scale_fill_manual(values = cols) +   labs(x = \"Bill length, mm\",        y = \"Flipper length, mm\") +   theme_minimal() +   scale_x_continuous(expand = c(0,0)) +   scale_y_continuous(expand = c(0,0)) +   theme(panel.grid = element_blank(),         panel.border = element_rect(fill = NA),         legend.position = '') +    labs(title = title)   } grid <- expand_grid(   flipper_length_mm = seq(min(penguins_orsf$flipper_length_mm),                      max(penguins_orsf$flipper_length_mm),                   len = 200),  bill_length_mm = seq(min(penguins_orsf$bill_length_mm),                       max(penguins_orsf$bill_length_mm),                       len = 200) ) fit_axis_tree <- penguins_orsf %>%   orsf(species ~ bill_length_mm + flipper_length_mm,       n_tree = 1,       mtry = 1,       tree_seeds = 106760) fit_axis_forest <- fit_axis_tree %>%   orsf_update(n_tree = 500)  fit_oblique_tree <- fit_axis_tree %>%   orsf_update(mtry = 2)  fit_oblique_forest <- fit_oblique_tree %>%   orsf_update(n_tree = 500) preds <- list(fit_axis_tree,               fit_axis_forest,               fit_oblique_tree,               fit_oblique_forest) %>%   map(predict, new_data = grid, pred_type = 'prob')  titles <- c(\"Axis-based tree\",             \"Axis-based forest\",             \"Oblique tree\",             \"Oblique forest\")  plots <- map2(preds, titles, plot_decision_surface, grid = grid)"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"variable-importance","dir":"","previous_headings":"","what":"Variable importance","title":"Accelerated Oblique Random Forests","text":"importance individual predictor variables can estimated three ways using aorsf can used type oblique RF. Also, variable importance functions always return named character vector negation2: variable assessed separately multiplying variable’s coefficients -1 determining much model’s performance changes. worse model’s performance negating coefficients given variable, important variable. technique promising b/c require permutation emphasizes variables larger coefficients linear combinations, also relatively new hasn’t studied much permutation importance. See Jaeger, (2023) details technique. permutation: variable assessed separately randomly permuting variable’s values determining much model’s performance changes. worse model’s performance permuting values given variable, important variable. technique flexible, intuitive, frequently used. also several known limitations analysis variance (ANOVA)4: p-value computed coefficient linear combination variables decision tree. Importance individual predictor variable proportion times p-value coefficient < 0.01. technique efficient computationally, may effective permutation negation terms selecting signal noise variables. See Menze, 2011 details technique. can supply R function estimate --bag error (see oob vignette) estimate --bag variable importance (see orsf_vi examples)","code":"orsf_vi_negate(pbc_fit) #>          bili        copper         stage           sex           age  #>  0.1552460736  0.1156218837  0.0796917628  0.0533427094  0.0283132385  #>       albumin           trt          chol      alk.phos      platelet  #>  0.0279823814  0.0168238416  0.0153010749  0.0148718669  0.0094582765  #>         edema       ascites       spiders       protime        hepato  #>  0.0067975986  0.0065505801  0.0062356214 -0.0004653046 -0.0026664147  #>           ast          trig  #> -0.0028902524 -0.0106616501 orsf_vi_permute(penguin_fit) #>    bill_length_mm     bill_depth_mm       body_mass_g            island  #>       0.121351910       0.101846889       0.097822451       0.080772909  #>               sex flipper_length_mm              year  #>       0.035053517       0.008270751      -0.008058339 orsf_vi_anova(bill_fit) #>           species               sex     bill_depth_mm flipper_length_mm  #>        0.51652893        0.27906977        0.06315789        0.04950495  #>       body_mass_g            island              year  #>        0.04807692        0.02687148        0.00000000"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"partial-dependence-pd","dir":"","previous_headings":"","what":"Partial dependence (PD)","title":"Accelerated Oblique Random Forests","text":"Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model’s prediction.. can use specific values predictor compute PD let aorsf pick reasonable values use pred_spec_auto(): summary function, orsf_summarize_uni(), computes PD many variables ask , using sensible values. PD, see vignette","code":"# pick your own values orsf_pd_oob(bill_fit, pred_spec = list(species = c(\"Adelie\", \"Gentoo\"))) #>    species     mean      lwr     medn      upr #>     <fctr>    <num>    <num>    <num>    <num> #> 1:  Adelie 39.99394 35.76532 39.80782 46.13931 #> 2:  Gentoo 46.66565 40.02938 46.88517 51.61367  # let aorsf pick reasonable values for you: orsf_pd_oob(bill_fit, pred_spec = pred_spec_auto(bill_depth_mm, island)) #>     bill_depth_mm    island     mean      lwr     medn      upr #>             <num>    <fctr>    <num>    <num>    <num>    <num> #>  1:          14.3    Biscoe 43.94960 35.90421 45.30159 51.05109 #>  2:          15.6    Biscoe 44.24705 36.62759 45.57321 51.08020 #>  3:          17.3    Biscoe 44.84757 36.53804 45.62910 53.93833 #>  4:          18.7    Biscoe 45.08939 36.35893 46.16893 54.42075 #>  5:          19.5    Biscoe 45.13608 36.21033 46.08023 54.42075 #> ---                                                             #> 11:          14.3 Torgersen 43.55984 35.47143 44.18127 51.05109 #> 12:          15.6 Torgersen 43.77317 35.44683 44.28406 51.08020 #> 13:          17.3 Torgersen 44.56465 35.84585 44.83694 53.93833 #> 14:          18.7 Torgersen 44.68367 35.44010 44.86667 54.42075 #> 15:          19.5 Torgersen 44.64605 35.44010 44.86667 54.42075 orsf_summarize_uni(pbc_fit, n_variables = 2) #>  #> -- bili (VI Rank: 1) ----------------------------- #>  #>         |----------------- Risk -----------------| #>   Value      Mean     Median     25th %    75th % #>  <char>     <num>      <num>      <num>     <num> #>    0.60 0.2098108 0.07168855 0.01138461 0.2860450 #>    0.80 0.2117933 0.07692308 0.01709469 0.2884990 #>    1.40 0.2326560 0.08445419 0.02100837 0.3563622 #>    3.55 0.4265979 0.35820106 0.05128824 0.7342923 #>    7.30 0.4724608 0.44746241 0.11759259 0.8039683 #>  #> -- copper (VI Rank: 2) --------------------------- #>  #>         |----------------- Risk -----------------| #>   Value      Mean     Median     25th %    75th % #>  <char>     <num>      <num>      <num>     <num> #>    25.0 0.2332412 0.04425936 0.01587919 0.3888304 #>    42.5 0.2535448 0.07417582 0.01754386 0.4151786 #>    74.0 0.2825471 0.11111111 0.01988069 0.4770833 #>     130 0.3259604 0.18771003 0.04658385 0.5054348 #>     217 0.4213303 0.28571429 0.13345865 0.6859423 #>  #>  Predicted risk at time t = 1788 for top 2 predictors"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"individual-conditional-expectations-ice","dir":"","previous_headings":"","what":"Individual conditional expectations (ICE)","title":"Accelerated Oblique Random Forests","text":"Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor. ICE, see vignette","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"interaction-scores","dir":"","previous_headings":"","what":"Interaction scores","title":"Accelerated Oblique Random Forests","text":"orsf_vint() function computes score possible interaction model based PD using method described Greenwell et al, 2018.5 can slow larger datasets, substantial speedups occur making use multi-threading restricting search smaller set predictors. values score mean? values average standard deviation standard deviation PD one variable conditional variable. interpreted relative one another, .e., higher scoring interaction likely reflect real interaction two variables lower scoring one. interaction scores make sense? Let’s test top scoring lowest scoring interactions using coxph(). Note: exploratory true null hypothesis test. ? used data generate test null hypothesis. much conducting statistical inference test interactions coxph demonstrating interaction scores orsf_vint() provides consistent tests models.","code":"preds_interaction <- c(\"albumin\", \"protime\", \"bili\", \"spiders\", \"trt\")  # While it is tempting to speed up `orsf_vint()` by growing a smaller  # number of trees, results may become unstable with this shortcut. pbc_interactions <- pbc_fit %>%   orsf_update(n_tree = 500, tree_seeds = 329) %>%   orsf_vint(n_thread = 0,  predictors = preds_interaction)  pbc_interactions #>          interaction      score #>               <char>      <num> #>  1: albumin..protime 0.97837184 #>  2:    protime..bili 0.78999788 #>  3:    albumin..bili 0.59128756 #>  4:    bili..spiders 0.13192184 #>  5:        bili..trt 0.13192184 #>  6: albumin..spiders 0.06578222 #>  7:     albumin..trt 0.06578222 #>  8: protime..spiders 0.03012718 #>  9:     protime..trt 0.03012718 #> 10:     spiders..trt 0.00000000 library(survival) # the top scoring interaction should get a lower p-value anova(coxph(Surv(time, status) ~ protime * albumin, data = pbc_orsf)) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>                  loglik  Chisq Df Pr(>|Chi|)     #> NULL            -550.19                          #> protime         -538.51 23.353  1  1.349e-06 *** #> albumin         -514.89 47.255  1  6.234e-12 *** #> protime:albumin -511.76  6.252  1    0.01241 *   #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # the bottom scoring interaction should get a higher p-value anova(coxph(Surv(time, status) ~ spiders * trt, data = pbc_orsf)) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>              loglik   Chisq Df Pr(>|Chi|)     #> NULL        -550.19                           #> spiders     -538.58 23.2159  1  1.448e-06 *** #> trt         -538.39  0.3877  1     0.5335     #> spiders:trt -538.29  0.2066  1     0.6494     #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"comparison-to-existing-software","dir":"","previous_headings":"","what":"Comparison to existing software","title":"Accelerated Oblique Random Forests","text":"survival analysis, comparisons aorsf existing software presented JCGS paper. paper: describes aorsf detail summary procedures used tree fitting algorithm runs general benchmark comparing aorsf obliqueRSF several learners reports prediction accuracy computational efficiency learners. runs simulation study comparing variable importance techniques oblique survival RFs, axis based survival RFs, boosted trees. reports probability variable importance technique rank relevant variable higher importance irrelevant variable.","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"references","dir":"","previous_headings":"","what":"References","title":"Accelerated Oblique Random Forests","text":"Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min Y, Mcclure LA, Howard G, Simon N (2019). “Oblique random survival forests.” Annals Applied Statistics, 13(3). doi:10.1214/19-aoas1261 https://doi.org/10.1214/19-aoas1261. Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM (2023). “Accelerated interpretable oblique random survival forests.” Journal Computational Graphical Statistics, 1-16. doi:10.1080/10618600.2023.2231048 https://doi.org/10.1080/10618600.2023.2231048. Horst , Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0, https://allisonhorst.github.io/palmerpenguins/. Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, F (2011). “oblique random forests.” Machine Learning Knowledge Discovery Databases: European Conference, ECML PKDD 2011, Athens, Greece, September 5-9, 2011, Proceedings, Part II 22, 453-469. Springer. Greenwell, M B, Boehmke, C B, McCarthy, J (2018). “simple effective model-based variable importance measure.” arXiv preprint arXiv:1805.04755.","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"funding","dir":"","previous_headings":"","what":"Funding","title":"Accelerated Oblique Random Forests","text":"developers aorsf received financial support Center Biomedical Informatics, Wake Forest University School Medicine. also received support National Center Advancing Translational Sciences National Institutes Health Award Number UL1TR001420. content solely responsibility authors necessarily represent official views National Institutes Health.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/aorsf-package.html","id":null,"dir":"Reference","previous_headings":"","what":"aorsf: Accelerated Oblique Random Forests — aorsf-package","title":"aorsf: Accelerated Oblique Random Forests — aorsf-package","text":"Fit, interpret, compute predictions oblique random forests. Includes support partial dependence, variable importance, passing customized functions variable importance identification linear combinations features. Methods oblique random survival forest described Jaeger et al., (2023) doi:10.1080/10618600.2023.2231048 .","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/aorsf-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"aorsf: Accelerated Oblique Random Forests — aorsf-package","text":"Maintainer: Byron Jaeger bjaeger@wakehealth.edu (ORCID) contributors: Nicholas Pajewski [contributor] Sawyer Welden swelden@wakehealth.edu [contributor] Christopher Jackson chris.jackson@mrc-bsu.cam.ac.uk [reviewer] Marvin Wright [reviewer] Lukas Burk [reviewer]","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":null,"dir":"Reference","previous_headings":"","what":"Coerce to data.table — as.data.table.orsf_summary_uni","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"Convert 'orsf_summary' object data.table object.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"","code":"# S3 method for orsf_summary_uni as.data.table(x, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"x object class 'orsf_summary_uni' ... used","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"data.table","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"","code":"if (FALSE) {  library(data.table)  object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 25)  smry <- orsf_summarize_uni(object, n_variables = 2)  as.data.table(smry)  }"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":null,"dir":"Reference","previous_headings":"","what":"Oblique Random Forests — orsf","title":"Oblique Random Forests — orsf","text":"Grow specify oblique random forest. name orsf() implies function works survival forests, can used classification, regression, survival forests.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Oblique Random Forests — orsf","text":"","code":"orsf(   data,   formula,   control = NULL,   weights = NULL,   n_tree = 500,   n_split = 5,   n_retry = 3,   n_thread = 0,   mtry = NULL,   sample_with_replacement = TRUE,   sample_fraction = 0.632,   leaf_min_events = 1,   leaf_min_obs = 5,   split_rule = NULL,   split_min_events = 5,   split_min_obs = 10,   split_min_stat = NULL,   oobag_pred_type = NULL,   oobag_pred_horizon = NULL,   oobag_eval_every = NULL,   oobag_fun = NULL,   importance = \"anova\",   importance_max_pvalue = 0.01,   group_factors = TRUE,   tree_seeds = NULL,   attach_data = TRUE,   no_fit = FALSE,   na_action = \"fail\",   verbose_progress = FALSE,   ... )  orsf_train(object, attach_data = TRUE)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Oblique Random Forests — orsf","text":"data data.frame, tibble, data.table contains relevant variables. formula (formula) Two sided formula single outcome. terms right names predictor variables, symbol '.' may used indicate variables data except response. symbol '-' may also used indicate removal predictor. Details response vary depending forest type: Classification: response single variable, variable type factor data. Regression: response single variable, variable typee double integer least 10 unique numeric values data. Survival: response include time variable, followed status variable, may written inside call Surv (see examples). control (orsf_control) object returned one orsf_control functions: orsf_control_survival, orsf_control_classification, orsf_control_regression. NULL (default) use accelerated control, fastest available option. survival classification, Cox Logistic regression 1 iteration, regression ordinary least squares. weights (numeric vector) Optional. given, input length equal nrow(data) complete imputed data length equal nrow(na.omit(data)) na_action \"omit\". weights vector used count observations events prior growing node tree, orsf() scales weights sum(weights) == nrow(data). helps make tree depth consistent weighted un-weighted fits. n_tree (integer) number trees grow. Default n_tree = 500. n_split (integer) number cut-points assessed splitting node decision trees. Default n_split = 5. n_retry (integer) node splittable, current linear combination inputs unable provide valid split, orsf try new linear combination based different set randomly selected predictors, n_retry times. Default n_retry = 3. Set n_retry = 0 prevent retries. n_thread (integer) number threads use growing trees, computing predictions, computing importance. Default 0, allows suitable number threads used based availability. mtry (integer) Number predictors randomly included candidates splitting node. default smallest integer greater square root number total predictors, .e., mtry = ceiling(sqrt(number predictors)) sample_with_replacement (logical) TRUE (default), observations sampled replacement -bag sample created decision tree. FALSE, observations sampled without replacement tree -bag sample containing sample_fraction% original sample. sample_fraction (double) proportion observations trees' -bag sample contain, relative number rows data. used sample_with_replacement FALSE. Default value 0.632. leaf_min_events (integer) input relevant survival analysis, specifies minimum number events leaf node. Default leaf_min_events = 1 leaf_min_obs (integer) minimum number observations leaf node. Default leaf_min_obs = 5. split_rule (character) assess quality potential splitting rule node. Valid options survival : 'logrank' : log-rank test statistic (default). 'cstat'   : Harrell's concordance statistic. classification, valid options : 'gini'  : gini impurity (default) 'cstat' : area underneath ROC curve (AUC-ROC) regression, valid options : 'variance' : variance reduction (default) split_min_events (integer) minimum number events required node consider splitting . Default split_min_events = 5. input relevant survival trees. split_min_obs (integer) minimum number observations required node consider splitting . Default split_min_obs = 10. split_min_stat (double) minimum test statistic required split node. splits found statistic exceeding split_min_stat, given node either becomes leaf retry occurs (n_retry retries). Defaults 3.84 split_rule = 'logrank' 0.55 split_rule = 'cstat' (see first note ) 0.00 split_rule = 'gini' (see second note ) 0.00 split_rule = 'variance' Note 1 C-statistic splitting, C < 0.50, consider statistic value 1 - C allow good 'anti-predictive' splits. , C-statistic initially computed 0.1, considered 1 - 0.10 = 0.90. Note 2 Gini impurity, value 0 1 usually indicate best worst possible scores, respectively. make things simple avoid introducing split_max_stat input, flip values Gini impurity 1 0 indicate best worst possible scores, respectively. oobag_pred_type (character) type --bag predictions compute fitting ensemble. Valid options tree type: 'none' : compute --bag predictions 'leaf' : ID predicted leaf returned tree Valid options survival: 'risk' : probability event occurring oobag_pred_horizon (default). 'surv' : 1 - risk. 'chf'  : cumulative hazard function oobag_pred_horizon. 'mort' : mortality, .e., number events expected observations training data identical given observation. Valid options classification: 'prob'  : probability class (default) 'class' : class (.e., .max(prob)) Valid options regression: 'mean' : mean value (default) oobag_pred_horizon (numeric) numeric value indicating time used --bag predictions. Default median observed times, .e., oobag_pred_horizon = median(time). input relevant survival trees prediction type 'risk', 'surv', 'chf'. oobag_eval_every (integer) --bag performance ensemble checked every oobag_eval_every trees. , oobag_eval_every = 10, --bag performance checked growing 10th tree, 20th tree, . Default oobag_eval_every = n_tree. oobag_fun (function) used evaluating --bag prediction accuracy every oobag_eval_every trees. oobag_fun = NULL (default), evaluation statistic selected based tree type survival: Harrell's C-statistic (1982) classification: Area underneath ROC curve (AUC-ROC) regression: Traditional prediction R-squared use oobag_fun note following: oobag_fun three inputs: y_mat, w_vec, s_vec survival trees, y_mat two column matrix first column named 'time' second named 'status'. classification trees, y_mat matrix number columns = number distinct classes outcome. regression, y_mat matrix one column. s_vec numeric vector containing predictions oobag_fun return numeric output length 1 details, see --bag vignette. importance (character) Indicate method variable importance: 'none': variable importance computed. 'anova': compute analysis variance (ANOVA) importance 'negate': compute negation importance 'permute': compute permutation importance details methods, see orsf_vi. importance_max_pvalue (double) relevant importance \"anova\". maximum p-value register positive case counting number times variable found 'significant' tree growth. Default 0.01, recommended Menze et al. group_factors (logical) relevant variable importance estimated. TRUE, importance factor variables reported overall aggregating importance individual levels factor. FALSE, importance individual factor levels returned. tree_seeds (integer vector) Optional. specified, random seeds set using values tree_seeds[]  growing tree . Two forests grown number trees seeds exact --bag samples, making --bag error estimates forests comparable. NULL (default), seeds picked random. attach_data (logical) TRUE, copy training data attached output. required plan using functions like orsf_pd_oob orsf_summarize_uni interpret forest using training data. Default TRUE. no_fit (logical) TRUE, model fitting steps defined saved, training initiated. object returned can directly submitted orsf_train() long attach_data TRUE. na_action (character) happen data contains missing values (.e., NA values). Valid options : 'fail' : error thrown data contains NA values 'omit' : rows data incomplete data dropped 'impute_meanmode' : missing values continuous categorical variables data imputed using mean mode, respectively. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used). object untrained 'aorsf' object, created setting no_fit = TRUE orsf().","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Oblique Random Forests — orsf","text":"obliqueForest object","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Oblique Random Forests — orsf","text":"function called orf()? earlier versions, aorsf package exclusively oblique random survival forests. formula survival oblique RFs: response formula can survival object returned Surv function, can also just time status variables. .e., Surv(time, status) ~ . works time + status ~ . works response can also survival object stored data. example, y ~ . valid formula data$y inherits Surv class. mtry: mtry parameter may temporarily reduced ensure linear models used find combinations predictors remain stable. occurs coefficients linear model fitting algorithms may become infinite number predictors exceeds number observations. oobag_fun: oobag_fun specified, used compute negation importance permutation importance, role ANOVA importance. n_thread: R function called C++ (.e., user-supplied function compute --bag error identify linear combinations variables), n_thread automatically set 1 attempting run R functions multiple threads cause R session crash.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"what-is-an-oblique-decision-tree-","dir":"Reference","previous_headings":"","what":"What is an oblique decision tree?","title":"Oblique Random Forests — orsf","text":"Decision trees developed splitting set training data two new subsets, goal similarity within new subsets . splitting process repeated resulting subsets data stopping criterion met. new subsets data formed based single predictor, decision tree said axis-based splits data appear perpendicular axis predictor. linear combinations variables used instead single variable, tree oblique splits data neither parallel right angle axis Figure : Decision trees classification axis-based splitting (left) oblique splitting (right). Cases orange squares; controls purple circles. trees partition predictor space defined variables X1 X2, oblique splits better job separating two classes.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"what-is-a-random-forest-","dir":"Reference","previous_headings":"","what":"What is a random forest?","title":"Oblique Random Forests — orsf","text":"Random forests collections de-correlated decision trees. Predictions tree aggregated make ensemble prediction forest. details, see Breiman el, 2001.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"training-out-of-bag-error-and-testing","dir":"Reference","previous_headings":"","what":"Training, out-of-bag error, and testing","title":"Oblique Random Forests — orsf","text":"random forests, tree grown bootstrapped version training set. bootstrap samples selected replacement, bootstrapped training set contains two-thirds instances original training set. '--bag' data instances bootstrapped training set. tree random forest can make predictions --bag data, --bag predictions can aggregated make ensemble --bag prediction. Since --bag data used grow tree, accuracy ensemble --bag predictions approximate generalization error random forest. Generalization error refers error random forest's predictions applied predict outcomes data used train , .e., testing data.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Oblique Random Forests — orsf","text":"orsf() entry-point aorsf package. can used fit classification, regression, survival forests. classification, fit oblique RF predict penguin species using penguin data magnificent palmerpenguins R package     regression, use data predict bill length penguins:     personal favorite oblique survival RF accelerated Cox regression first type oblique RF aorsf provided (see ArXiv paper; paper also published Journal Computational Graphical Statistics publicly available ). , use predict mortality risk following diagnosis primary biliary cirrhosis:","code":"library(aorsf) library(magrittr) # for %>% ##  ## Attaching package: 'magrittr'  ## The following object is masked from 'package:tidyr': ##  ##     extract  ## The following objects are masked from 'package:testthat': ##  ##     equals, is_less_than, not # An oblique classification RF penguin_fit <- orsf(data = penguins_orsf,                     n_tree = 5,                      formula = species ~ .)  penguin_fit ## ---------- Oblique random classification forest ##  ##      Linear combinations: Accelerated Logistic regression ##           N observations: 333 ##                N classes: 3 ##                  N trees: 5 ##       N predictors total: 7 ##    N predictors per node: 3 ##  Average leaves per tree: 4.6 ## Min observations in leaf: 5 ##           OOB stat value: 0.99 ##            OOB stat type: AUC-ROC ##      Variable importance: anova ##  ## ----------------------------------------- # An oblique regression RF bill_fit <- orsf(data = penguins_orsf,                   n_tree = 5,                   formula = bill_length_mm ~ .)  bill_fit ## ---------- Oblique random regression forest ##  ##      Linear combinations: Accelerated Linear regression ##           N observations: 333 ##                  N trees: 5 ##       N predictors total: 7 ##    N predictors per node: 3 ##  Average leaves per tree: 51 ## Min observations in leaf: 5 ##           OOB stat value: 0.70 ##            OOB stat type: RSQ ##      Variable importance: anova ##  ## ----------------------------------------- # An oblique survival RF pbc_fit <- orsf(data = pbc_orsf,                  n_tree = 5,                 formula = Surv(time, status) ~ . - id)  pbc_fit ## ---------- Oblique random survival forest ##  ##      Linear combinations: Accelerated Cox regression ##           N observations: 276 ##                 N events: 111 ##                  N trees: 5 ##       N predictors total: 17 ##    N predictors per node: 5 ##  Average leaves per tree: 22.2 ## Min observations in leaf: 5 ##       Min events in leaf: 1 ##           OOB stat value: 0.78 ##            OOB stat type: Harrell's C-index ##      Variable importance: anova ##  ## -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"more-than-one-way-to-grow-a-forest","dir":"Reference","previous_headings":"","what":"More than one way to grow a forest","title":"Oblique Random Forests — orsf","text":"can use orsf(no_fit = TRUE) make specification grow forest instead fitted forest.     ? Two reasons: computational tasks, may want check long take fit forest commit :     fitting multiple forests, use blueprint along orsf_train() orsf_update() simplify code:","code":"orsf_spec <- orsf(pbc_orsf,                    formula = time + status ~ . - id,                   no_fit = TRUE)  orsf_spec ## Untrained oblique random survival forest ##  ##      Linear combinations: Accelerated Cox regression ##           N observations: 276 ##                 N events: 111 ##                  N trees: 500 ##       N predictors total: 17 ##    N predictors per node: 5 ##  Average leaves per tree: 0 ## Min observations in leaf: 5 ##       Min events in leaf: 1 ##           OOB stat value: none ##            OOB stat type: Harrell's C-index ##      Variable importance: anova ##  ## ----------------------------------------- orsf_spec %>%   orsf_update(n_tree = 10000) %>%  orsf_time_to_train() ## Time difference of 2.429678 secs orsf_fit <- orsf_train(orsf_spec) orsf_fit_10 <- orsf_update(orsf_fit, leaf_min_obs = 10) orsf_fit_20 <- orsf_update(orsf_fit, leaf_min_obs = 20)  orsf_fit$leaf_min_obs ## [1] 5 orsf_fit_10$leaf_min_obs ## [1] 10 orsf_fit_20$leaf_min_obs ## [1] 20"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"tidymodels","dir":"Reference","previous_headings":"","what":"tidymodels","title":"Oblique Random Forests — orsf","text":"tidymodels includes support aorsf computational engine:   Prediction aorsf models different times also supported:","code":"library(tidymodels) library(censored) library(yardstick)  pbc_tidy <- pbc_orsf %>%   mutate(event_time = Surv(time, status), .before = 1) %>%   select(-c(id, time, status)) %>%   as_tibble()  split  <- initial_split(pbc_tidy)  orsf_spec <- rand_forest() %>%   set_engine(\"aorsf\") %>%   set_mode(\"censored regression\")  orsf_fit <- fit(orsf_spec,                  formula = event_time ~ .,                  data = training(split)) time_points <- seq(500, 3000, by = 500)  test_pred <- augment(orsf_fit,                       new_data = testing(split),                       eval_time = time_points)  brier_scores <- test_pred %>%    brier_survival(truth = event_time, .pred)  brier_scores ## # A tibble: 6 x 4 ##   .metric        .estimator .eval_time .estimate ##   <chr>          <chr>           <dbl>     <dbl> ## 1 brier_survival standard          500    0.0597 ## 2 brier_survival standard         1000    0.0943 ## 3 brier_survival standard         1500    0.0883 ## 4 brier_survival standard         2000    0.102  ## 5 brier_survival standard         2500    0.137  ## 6 brier_survival standard         3000    0.153 roc_scores <- test_pred %>%    roc_auc_survival(truth = event_time, .pred)  roc_scores ## # A tibble: 6 x 4 ##   .metric          .estimator .eval_time .estimate ##   <chr>            <chr>           <dbl>     <dbl> ## 1 roc_auc_survival standard          500     0.957 ## 2 roc_auc_survival standard         1000     0.912 ## 3 roc_auc_survival standard         1500     0.935 ## 4 roc_auc_survival standard         2000     0.931 ## 5 roc_auc_survival standard         2500     0.907 ## 6 roc_auc_survival standard         3000     0.889"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Oblique Random Forests — orsf","text":"Harrell, E F, Califf, M R, Pryor, B D, Lee, L K, Rosati, R (1982). \"Evaluating yield medical tests.\" Jama, 247(18), 2543-2546. Breiman, Leo (2001). \"Random Forests.\" Machine Learning, 45(1), 5-32. ISSN 1573-0565. Ishwaran H, Kogalur UB, Blackstone EH, Lauer MS (2008). \"Random survival forests.\" Annals Applied Statistics, 2(3). Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, F (2011). \"oblique random forests.\" Machine Learning Knowledge Discovery Databases: European Conference, ECML PKDD 2011, Athens, Greece, September 5-9, 2011, Proceedings, Part II 22, 453-469. Springer. Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min Y, Mcclure LA, Howard G, Simon N (2019). \"Oblique random survival forests.\" Annals Applied Statistics, 13(3). Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM (2023). \"Accelerated interpretable oblique random survival forests.\" Journal Computational Graphical Statistics, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":null,"dir":"Reference","previous_headings":"","what":"Oblique random forest control — orsf_control","title":"Oblique random forest control — orsf_control","text":"Oblique random forest control","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Oblique random forest control — orsf_control","text":"","code":"orsf_control(   tree_type,   method,   scale_x,   ties,   net_mix,   target_df,   max_iter,   epsilon,   ... )  orsf_control_classification(   method = \"glm\",   scale_x = TRUE,   net_mix = 0.5,   target_df = NULL,   max_iter = 20,   epsilon = 1e-09,   ... )  orsf_control_regression(   method = \"glm\",   scale_x = TRUE,   net_mix = 0.5,   target_df = NULL,   max_iter = 20,   epsilon = 1e-09,   ... )  orsf_control_survival(   method = \"glm\",   scale_x = TRUE,   ties = \"efron\",   net_mix = 0.5,   target_df = NULL,   max_iter = 20,   epsilon = 1e-09,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Oblique random forest control — orsf_control","text":"tree_type (character) type tree. Valid options \"classification\", .e., categorical outcomes \"regression\", .e., continuous outcomes \"survival\", .e., time-event outcomes method (character function) identify linear linear combinations predictors. method character value, must one : 'glm': linear, logistic, cox regression 'net': 'glm' penalty terms 'pca': principal component analysis 'random': random draw uniform distribution method function, used identify  linear combinations predictor variables. method must case accept three inputs named x_node, y_node w_node, expect following types dimensions: x_node (matrix; n rows, p columns) y_node (matrix; n rows, 2 columns) w_node (matrix; n rows, 1 column) addition, method must return matrix p rows 1 column. scale_x (logical) TRUE, values predictors scaled prior instance finding linear combination predictors, using summary values data current node decision tree. ties (character) character string specifying method tie handling. relevant modeling survival outcomes using method engages tied outcome times. ties, methods equivalent. Valid options 'breslow' 'efron'. Efron approximation default accurate dealing tied event times similar computational efficiency compared Breslow method. net_mix (double) elastic net mixing parameter. value 1 gives lasso penalty, value 0 gives ridge penalty. multiple values alpha given, penalized model fit using alpha value prior splitting node. target_df (integer) Preferred number variables used linear combination. example, mtry 5 target_df 3, sample 5 predictors look best linear combination using 3 . max_iter (integer) iteration continues convergence (see eps ) number attempted iterations equal iter_max. epsilon (double) using modeling based method, iteration continues algorithm relative change kind objective less epsilon, absolute change less sqrt(epsilon). ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Oblique random forest control — orsf_control","text":"object class 'orsf_control', used input control argument orsf. Components : tree_type: type trees fit lincomb_type: method linear combinations lincomb_eps: epsilon convergence lincomb_iter_max: max iterations lincomb_scale: scale . lincomb_alpha: mixing parameter lincomb_df_target: target degrees freedom lincomb_ties_method: method ties survival time lincomb_R_function: R function custom splits","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Oblique random forest control — orsf_control","text":"Adjust scale_x risk. Setting scale_x = FALSE reduce computation time also make orsf model dependent scale data, default value TRUE.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Oblique random forest control — orsf_control","text":"First load relevant packages","code":"set.seed(329730) suppressPackageStartupMessages({  library(aorsf)  library(survival)  library(ranger)  library(riskRegression) })"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"accelerated-linear-combinations","dir":"Reference","previous_headings":"","what":"Accelerated linear combinations","title":"Oblique random forest control — orsf_control","text":"accelerated ORSF ensemble default nice balance computational speed prediction accuracy. runs single iteration Newton Raphson scoring Cox partial likelihood function find linear combinations predictors.","code":"fit_accel <- orsf(pbc_orsf,                    control = orsf_control_survival(),                   formula = Surv(time, status) ~ . - id,                   tree_seeds = 329)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"linear-combinations-with-cox-regression","dir":"Reference","previous_headings":"","what":"Linear combinations with Cox regression","title":"Oblique random forest control — orsf_control","text":"Setting inputs orsf_control_survival scale X matrix repeat iterations convergence allows run Cox regression non-terminal node survival tree, using regression coefficients create linear combinations predictors:","code":"control_cph <- orsf_control_survival(method = 'glm',                                       scale_x = TRUE,                                       max_iter = 20)  fit_cph <- orsf(pbc_orsf,                  control = control_cph,                 formula = Surv(time, status) ~ . - id,                 tree_seeds = 329)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"linear-combinations-with-penalized-cox-regression","dir":"Reference","previous_headings":"","what":"Linear combinations with penalized cox regression","title":"Oblique random forest control — orsf_control","text":"Setting method == 'net' runs penalized Cox regression non-terminal node survival tree. can really helpful want feature selection within node, lot slower 'glm' option.","code":"# select 3 predictors out of 5 to be used in # each linear combination of predictors.  control_net <- orsf_control_survival(method = 'net', target_df = 3)  fit_net <- orsf(pbc_orsf,                  control = control_net,                 formula = Surv(time, status) ~ . - id,                 tree_seeds = 329)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"linear-combinations-with-your-own-function","dir":"Reference","previous_headings":"","what":"Linear combinations with your own function","title":"Oblique random forest control — orsf_control","text":"addition built-methods, customized functions can used identify linear combinations predictors. ’ll demonstrate . first uses random coefficients   second derives coefficients principal component analysis   third uses ranger() inside orsf(). approach similar method known reinforcement learning trees (see RLT package), although method “muting” crude compared method proposed Zhu et al.    can plug functions orsf_control_custom(), pass result orsf():   fit seems work best example? Let’s find evaluating --bag survival predictions.   AUC values, highest lowest:     indices prediction accuracy:     inspection, net, accel, rlt high discrimination index prediction accuracy. rando pca less well, aren’t bad.","code":"f_rando <- function(x_node, y_node, w_node){  matrix(runif(ncol(x_node)), ncol=1)  } f_pca <- function(x_node, y_node, w_node) {     # estimate two principal components.  pca <- stats::prcomp(x_node, rank. = 2)  # use the second principal component to split the node  pca$rotation[, 1L, drop = FALSE]   } f_rlt <- function(x_node, y_node, w_node){    colnames(y_node) <- c('time', 'status')  colnames(x_node) <- paste(\"x\", seq(ncol(x_node)), sep = '')    data <- as.data.frame(cbind(y_node, x_node))    if(nrow(data) <= 10)    return(matrix(runif(ncol(x_node)), ncol = 1))    fit <- ranger::ranger(data = data,                         formula = Surv(time, status) ~ .,                         num.trees = 25,                         num.threads = 1,                        min.node.size = 5,                        importance = 'permutation')    out <- sort(fit$variable.importance, decreasing = TRUE)    # \"mute\" the least two important variables  n_vars <- length(out)  if(n_vars > 4){    out[c(n_vars, n_vars-1)] <- 0  }    # ensure out has same variable order as input  out <- out[colnames(x_node)]    # protect yourself  out[is.na(out)] <- 0    matrix(out, ncol = 1)   } fit_rando <- orsf(pbc_orsf,                   Surv(time, status) ~ . - id,                   control = orsf_control_survival(method = f_rando),                   tree_seeds = 329)  fit_pca <- orsf(pbc_orsf,                 Surv(time, status) ~ . - id,                 control = orsf_control_survival(method = f_pca),                 tree_seeds = 329)  fit_rlt <- orsf(pbc_orsf, time + status ~ . - id,                  control = orsf_control_survival(method = f_rlt),                 tree_seeds = 329) risk_preds <- list(  accel = fit_accel$pred_oobag,  cph   = fit_cph$pred_oobag,  net   = fit_net$pred_oobag,  rando = fit_rando$pred_oobag,  pca   = fit_pca$pred_oobag,  rlt   = fit_rlt$pred_oobag )  sc <- Score(object = risk_preds,              formula = Surv(time, status) ~ 1,              data = pbc_orsf,              summary = 'IPA',             times = fit_accel$pred_horizon) sc$AUC$score[order(-AUC)] ##     model times       AUC         se     lower     upper ##    <fctr> <num>     <num>      <num>     <num>     <num> ## 1:    net  1788 0.9151649 0.02025057 0.8754745 0.9548553 ## 2:    rlt  1788 0.9119200 0.02090107 0.8709547 0.9528854 ## 3:  accel  1788 0.9095628 0.02143250 0.8675558 0.9515697 ## 4:    cph  1788 0.9095628 0.02143250 0.8675558 0.9515697 ## 5:  rando  1788 0.9062197 0.02148854 0.8641029 0.9483365 ## 6:    pca  1788 0.8999479 0.02226683 0.8563057 0.9435901 sc$Brier$score[order(-IPA), .(model, times, IPA)] ##         model times       IPA ##        <fctr> <num>     <num> ## 1:        net  1788 0.4905777 ## 2:      accel  1788 0.4806649 ## 3:        cph  1788 0.4806649 ## 4:        rlt  1788 0.4675228 ## 5:        pca  1788 0.4383995 ## 6:      rando  1788 0.4302814 ## 7: Null model  1788 0.0000000"},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":null,"dir":"Reference","previous_headings":"","what":"Cox regression ORSF control — orsf_control_cph","title":"Cox regression ORSF control — orsf_control_cph","text":"Use coefficients proportional hazards model create linear combinations predictor variables fitting orsf model.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Cox regression ORSF control — orsf_control_cph","text":"","code":"orsf_control_cph(method = \"efron\", eps = 1e-09, iter_max = 20, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Cox regression ORSF control — orsf_control_cph","text":"method (character) character string specifying method tie handling. ties, methods equivalent. Valid options 'breslow' 'efron'. Efron approximation default accurate dealing tied event times similar computational efficiency compared Breslow method. eps (double) using Newton Raphson scoring identify linear combinations inputs, iteration continues algorithm relative change  log partial likelihood less eps, absolute change less sqrt(eps). Must positive. default value 1e-09 used consistency survival::coxph.control. iter_max (integer) iteration continues convergence (see eps ) number attempted iterations equal iter_max. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Cox regression ORSF control — orsf_control_cph","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Cox regression ORSF control — orsf_control_cph","text":"code  survival package modified make routine. details Cox proportional hazards model, see coxph /Therneau Grambsch (2000).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Cox regression ORSF control — orsf_control_cph","text":"Therneau T.M., Grambsch P.M. (2000) Cox Model. : Modeling Survival Data: Extending Cox Model. Statistics Biology Health. Springer, New York, NY. DOI: 10.1007/978-1-4757-3294-8_3","code":""},{"path":[]},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_custom.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Custom ORSF control — orsf_control_custom","text":"","code":"orsf_control_custom(beta_fun, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_custom.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Custom ORSF control — orsf_control_custom","text":"beta_fun (function) function define coefficients used linear combinations predictor variables. beta_fun must accept three inputs named x_node, y_node w_node, expect following types dimensions: x_node (matrix; n rows, p columns) y_node (matrix; n rows, 2 columns) w_node (matrix; n rows, 1 column) addition, beta_fun must return matrix p rows 1 column. conditions met, orsf_control_custom() let know. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_custom.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Custom ORSF control — orsf_control_custom","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":null,"dir":"Reference","previous_headings":"","what":"Accelerated ORSF control — orsf_control_fast","title":"Accelerated ORSF control — orsf_control_fast","text":"Fast methods identify linear combinations predictors fitting orsf model.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Accelerated ORSF control — orsf_control_fast","text":"","code":"orsf_control_fast(method = \"efron\", do_scale = TRUE, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Accelerated ORSF control — orsf_control_fast","text":"method (character) character string specifying method tie handling. ties, methods equivalent. Valid options 'breslow' 'efron'. Efron approximation default accurate dealing tied event times similar computational efficiency compared Breslow method. do_scale (logical) TRUE, values predictors scaled prior instance Newton Raphson scoring, using summary values data current node decision tree. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Accelerated ORSF control — orsf_control_fast","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Accelerated ORSF control — orsf_control_fast","text":"code  survival package modified make routine. Adjust do_scale risk. Setting do_scale = FALSE reduce computation time also make orsf model dependent scale data, default value TRUE.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Penalized Cox regression ORSF control — orsf_control_net","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"Use regularized Cox proportional hazard models identify linear combinations input variables fitting orsf model.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"","code":"orsf_control_net(alpha = 1/2, df_target = NULL, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"alpha (double) elastic net mixing parameter. value 1 gives lasso penalty, value 0 gives ridge penalty. multiple values alpha given, penalized model fit using alpha value prior splitting node. df_target (integer) Preferred number variables used linear combination. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"df_target less mtry, separate argument orsf indicates number variables chosen random prior finding linear combination variables.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"Simon, Noah, Friedman, Jerome, Hastie, Trevor, Tibshirani, Rob (2011). \"Regularization paths Cox's proportional hazards model via coordinate descent.\" Journal statistical software, 39(5), 1.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":null,"dir":"Reference","previous_headings":"","what":"Individual Conditional Expectations — orsf_ice_oob","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Compute individual conditional expectations oblique random forest. Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor. can compute individual conditional expectations three ways using random forest: using -bag predictions training data using --bag predictions training data using predictions new set data See examples details","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Individual Conditional Expectations — orsf_ice_oob","text":"","code":"orsf_ice_oob(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_ice_inb(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_ice_new(   object,   pred_spec,   new_data,   pred_horizon = NULL,   pred_type = NULL,   na_action = \"fail\",   expand_grid = TRUE,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Individual Conditional Expectations — orsf_ice_oob","text":"object (ObliqueForest) trained oblique random forest object (see orsf). pred_spec (named list, pspec_auto, data.frame). pred_spec named list, item list vector values used points partial dependence function. name item list indicate variable modified take corresponding values. pred_spec created using pred_spec_auto(), needed names variables use (see pred_spec_auto). pred_spec data.frame, columns indicate variable names, values indicate variable values, partial dependence computed using inputs row. pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_type (character) type predictions compute. Valid Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class regression: 'mean': predicted mean, .e., expected value expand_grid (logical) TRUE, partial dependence computed possible combinations inputs pred_spec. FALSE, partial dependence computed variable pred_spec, separately. boundary_checks (logical) TRUE, pred_spec checked make sure requested values 10th 90th percentile object's training data. FALSE, checks skipped. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used). new_data data.frame, tibble, data.table compute predictions . na_action (character) happen new_data contains missing values (.e., NA values). Valid options : 'fail' : error thrown new_data contains NA values 'omit' : rows new_data incomplete data dropped","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Individual Conditional Expectations — orsf_ice_oob","text":"data.table containing individual conditional expectations specified variable(s) , relevant, specified prediction horizon(s).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Individual Conditional Expectations — orsf_ice_oob","text":"can compute individual conditional expectation individual conditional expectations three ways: using -bag predictions training data. -bag individual conditional expectation indicates relationships model learned training. helpful goal interpret model. using --bag predictions training data. --bag individual conditional expectation indicates relationships model learned training using --bag data simulates application model new data. helpful want test model’s reliability fairness new data don’t access large testing set. using predictions new set data. New data individual conditional expectation shows model predicts outcomes observations seen. helpful want test model’s reliability fairness.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"classification","dir":"Reference","previous_headings":"","what":"Classification","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Begin fitting oblique classification random forest:   Compute individual conditional expectation using --bag data flipper_length_mm = c(190, 210).     two identifiers output: id_variable identifier current value variable(s) data. redundant one variable, helpful multiple variables. id_row identifier observation original data. Note predicted probabilities returned class observation data. Predicted probabilities given observation given variable value sum 1. example,","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  ice_oob <- orsf_ice_oob(fit_clsf, pred_spec = pred_spec)  ice_oob ## Key: <class> ##      id_variable id_row  class flipper_length_mm       pred ##            <int> <char> <fctr>             <num>      <num> ##   1:           1      1 Adelie               190 0.92169247 ##   2:           1      2 Adelie               190 0.80944657 ##   3:           1      3 Adelie               190 0.85172955 ##   4:           1      4 Adelie               190 0.93559327 ##   5:           1      5 Adelie               190 0.97708693 ##  ---                                                        ## 896:           2    146 Gentoo               210 0.26092984 ## 897:           2    147 Gentoo               210 0.04798334 ## 898:           2    148 Gentoo               210 0.07927359 ## 899:           2    149 Gentoo               210 0.84779971 ## 900:           2    150 Gentoo               210 0.11105143 ice_oob %>%  .[flipper_length_mm == 190] %>%   .[id_row == 1] %>%   .[['pred']] %>%   sum() ## [1] 1"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"regression","dir":"Reference","previous_headings":"","what":"Regression","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Begin fitting oblique regression random forest:   Compute individual conditional expectation using new data flipper_length_mm = c(190, 210).     can also let pred_spec_auto pick reasonable values like :     default, combinations variables used. However, can also look variables one one, separately, like :     can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new ##      id_variable id_row flipper_length_mm     pred ##            <int> <char>             <num>    <num> ##   1:           1      1               190 37.94483 ##   2:           1      2               190 37.61595 ##   3:           1      3               190 37.53681 ##   4:           1      4               190 39.49476 ##   5:           1      5               190 38.95635 ##  ---                                               ## 362:           2    179               210 51.80471 ## 363:           2    180               210 47.27183 ## 364:           2    181               210 47.05031 ## 365:           2    182               210 50.39028 ## 366:           2    183               210 48.44774 pred_spec = pred_spec_auto(species, island, body_mass_g)  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new ##       id_variable id_row species    island body_mass_g     pred ##             <int> <char>  <fctr>    <fctr>       <num>    <num> ##    1:           1      1  Adelie    Biscoe        3200 37.78339 ##    2:           1      2  Adelie    Biscoe        3200 37.73273 ##    3:           1      3  Adelie    Biscoe        3200 37.71248 ##    4:           1      4  Adelie    Biscoe        3200 40.25782 ##    5:           1      5  Adelie    Biscoe        3200 40.04074 ##   ---                                                           ## 8231:          45    179  Gentoo Torgersen        5300 46.14559 ## 8232:          45    180  Gentoo Torgersen        5300 43.98050 ## 8233:          45    181  Gentoo Torgersen        5300 44.59837 ## 8234:          45    182  Gentoo Torgersen        5300 44.85146 ## 8235:          45    183  Gentoo Torgersen        5300 44.23710 ice_new <- orsf_ice_new(fit_regr,                          expand_grid = FALSE,                         pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new ##       id_variable id_row    variable value  level     pred ##             <int> <char>      <char> <num> <char>    <num> ##    1:           1      1     species    NA Adelie 37.74136 ##    2:           1      2     species    NA Adelie 37.42367 ##    3:           1      3     species    NA Adelie 37.04598 ##    4:           1      4     species    NA Adelie 39.89602 ##    5:           1      5     species    NA Adelie 39.14848 ##   ---                                                      ## 2009:           5    179 body_mass_g  5300   <NA> 51.50196 ## 2010:           5    180 body_mass_g  5300   <NA> 47.27055 ## 2011:           5    181 body_mass_g  5300   <NA> 48.34064 ## 2012:           5    182 body_mass_g  5300   <NA> 48.75828 ## 2013:           5    183 body_mass_g  5300   <NA> 48.11020 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = custom_pred_spec,                         new_data = penguins_orsf_test)  ice_new ##      id_variable id_row species island     pred ##            <int> <char>  <fctr> <fctr>    <num> ##   1:           1      1  Adelie Biscoe 38.52327 ##   2:           1      2  Adelie Biscoe 38.32073 ##   3:           1      3  Adelie Biscoe 37.71248 ##   4:           1      4  Adelie Biscoe 41.68380 ##   5:           1      5  Adelie Biscoe 40.91140 ##  ---                                            ## 179:           1    179  Adelie Biscoe 43.09493 ## 180:           1    180  Adelie Biscoe 38.79455 ## 181:           1    181  Adelie Biscoe 39.37734 ## 182:           1    182  Adelie Biscoe 40.71952 ## 183:           1    183  Adelie Biscoe 39.34501"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"survival","dir":"Reference","previous_headings":"","what":"Survival","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Begin fitting oblique survival random forest:   Compute individual conditional expectation using -bag data bili = c(1,2,3,4,5):     don’t specific values variable mind, let pred_spec_auto pick :     Specify pred_horizon get individual conditional expectation value:     Multi-prediction horizon ice comes minimal extra computational cost. Use fine grid time values assess whether predictors time-varying effects.","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) ice_train <- orsf_ice_inb(fit_surv, pred_spec = list(bili = 1:5)) ice_train ##      id_variable id_row pred_horizon  bili      pred ##            <int> <char>        <num> <num>     <num> ##   1:           1      1      1826.25     1 0.1290317 ##   2:           1      2      1826.25     1 0.1242352 ##   3:           1      3      1826.25     1 0.0963452 ##   4:           1      4      1826.25     1 0.1172367 ##   5:           1      5      1826.25     1 0.2030256 ##  ---                                                 ## 746:           5    146      1826.25     5 0.7868537 ## 747:           5    147      1826.25     5 0.2012954 ## 748:           5    148      1826.25     5 0.4893605 ## 749:           5    149      1826.25     5 0.4698220 ## 750:           5    150      1826.25     5 0.9557285 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili)) ice_train ##      id_variable id_row pred_horizon  bili       pred ##            <int> <char>        <num> <num>      <num> ##   1:           1      1      1826.25  0.55 0.11728559 ##   2:           1      2      1826.25  0.55 0.11728839 ##   3:           1      3      1826.25  0.55 0.08950739 ##   4:           1      4      1826.25  0.55 0.10064959 ##   5:           1      5      1826.25  0.55 0.18736417 ##  ---                                                  ## 746:           5    146      1826.25  7.25 0.82600898 ## 747:           5    147      1826.25  7.25 0.29156437 ## 748:           5    148      1826.25  7.25 0.58395919 ## 749:           5    149      1826.25  7.25 0.54202021 ## 750:           5    150      1826.25  7.25 0.96391985 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili),                           pred_horizon = seq(500, 3000, by = 500)) ice_train ##       id_variable id_row pred_horizon  bili        pred ##             <int> <char>        <num> <num>       <num> ##    1:           1      1          500  0.55 0.008276627 ##    2:           1      1         1000  0.55 0.055724516 ##    3:           1      1         1500  0.55 0.085091120 ##    4:           1      1         2000  0.55 0.123423352 ##    5:           1      1         2500  0.55 0.166380739 ##   ---                                                   ## 4496:           5    150         1000  7.25 0.837774757 ## 4497:           5    150         1500  7.25 0.934536379 ## 4498:           5    150         2000  7.25 0.967823286 ## 4499:           5    150         2500  7.25 0.972059574 ## 4500:           5    150         3000  7.25 0.980785643"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":null,"dir":"Reference","previous_headings":"","what":"Partial dependence — orsf_pd_oob","title":"Partial dependence — orsf_pd_oob","text":"Compute partial dependence oblique random forest. Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model's prediction. can compute partial dependence three ways using random forest: using -bag predictions training data using --bag predictions training data using predictions new set data See examples details","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Partial dependence — orsf_pd_oob","text":"","code":"orsf_pd_oob(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   prob_values = c(0.025, 0.5, 0.975),   prob_labels = c(\"lwr\", \"medn\", \"upr\"),   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_pd_inb(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   prob_values = c(0.025, 0.5, 0.975),   prob_labels = c(\"lwr\", \"medn\", \"upr\"),   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_pd_new(   object,   pred_spec,   new_data,   pred_horizon = NULL,   pred_type = NULL,   na_action = \"fail\",   expand_grid = TRUE,   prob_values = c(0.025, 0.5, 0.975),   prob_labels = c(\"lwr\", \"medn\", \"upr\"),   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Partial dependence — orsf_pd_oob","text":"object (ObliqueForest) trained oblique random forest object (see orsf). pred_spec (named list, pspec_auto, data.frame). pred_spec named list, item list vector values used points partial dependence function. name item list indicate variable modified take corresponding values. pred_spec created using pred_spec_auto(), needed names variables use (see pred_spec_auto). pred_spec data.frame, columns indicate variable names, values indicate variable values, partial dependence computed using inputs row. pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_type (character) type predictions compute. Valid Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class regression: 'mean': predicted mean, .e., expected value expand_grid (logical) TRUE, partial dependence computed possible combinations inputs pred_spec. FALSE, partial dependence computed variable pred_spec, separately. prob_values (numeric) vector values 0 1, indicating quantiles used summarize partial dependence values set inputs. prob_values length prob_labels. quantiles calculated based predictions object set values indicated pred_spec. prob_labels (character) vector labels length prob_values, label indicating corresponding value prob_values labelled summarized outputs. prob_labels length prob_values. boundary_checks (logical) TRUE, pred_spec checked make sure requested values 10th 90th percentile object's training data. FALSE, checks skipped. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used). new_data data.frame, tibble, data.table compute predictions . na_action (character) happen new_data contains missing values (.e., NA values). Valid options : 'fail' : error thrown new_data contains NA values 'omit' : rows new_data incomplete data dropped","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Partial dependence — orsf_pd_oob","text":"data.table containing partial dependence values specified variable(s) , relevant, specified prediction horizon(s).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Partial dependence — orsf_pd_oob","text":"Partial dependence number known limitations assumptions users aware (see Hooker, 2021). particular, partial dependence less intuitive >2 predictors examined jointly, assumed feature(s) partial dependence computed correlated features (likely true many cases). Accumulated local effect plots can used (see ) case feature independence valid assumption.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Partial dependence — orsf_pd_oob","text":"can compute partial dependence individual conditional expectations three ways: using -bag predictions training data. -bag partial dependence indicates relationships model learned training. helpful goal interpret model. using --bag predictions training data. --bag partial dependence indicates relationships model learned training using --bag data simulates application model new data. helpful want test model’s reliability fairness new data don’t access large testing set. using predictions new set data. New data partial dependence shows model predicts outcomes observations seen. helpful want test model’s reliability fairness.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"classification","dir":"Reference","previous_headings":"","what":"Classification","title":"Partial dependence — orsf_pd_oob","text":"Begin fitting oblique classification random forest:   Compute partial dependence using --bag data flipper_length_mm = c(190, 210).     Note predicted probabilities returned class probabilities mean column sum 1 take sum class specific value pred_spec variables. example,     isn’t case median predicted probability!","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_oob <- orsf_pd_oob(fit_clsf, pred_spec = pred_spec)  pd_oob ## Key: <class> ##        class flipper_length_mm      mean         lwr       medn       upr ##       <fctr>             <num>     <num>       <num>      <num>     <num> ## 1:    Adelie               190 0.6176908 0.202278109 0.75856417 0.9810614 ## 2:    Adelie               210 0.4338528 0.019173811 0.56489202 0.8648110 ## 3: Chinstrap               190 0.2114979 0.017643385 0.15211271 0.7215181 ## 4: Chinstrap               210 0.1803019 0.020108201 0.09679464 0.7035053 ## 5:    Gentoo               190 0.1708113 0.001334861 0.02769695 0.5750201 ## 6:    Gentoo               210 0.3858453 0.068685035 0.20717073 0.9532853 sum(pd_oob[flipper_length_mm == 190, mean]) ## [1] 1 sum(pd_oob[flipper_length_mm == 190, medn]) ## [1] 0.9383738"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"regression","dir":"Reference","previous_headings":"","what":"Regression","title":"Partial dependence — orsf_pd_oob","text":"Begin fitting oblique regression random forest:   Compute partial dependence using new data flipper_length_mm = c(190, 210).     can also let pred_spec_auto pick reasonable values like :     default, combinations variables used. However, can also look variables one one, separately, like :     can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new ##    flipper_length_mm     mean      lwr     medn      upr ##                <num>    <num>    <num>    <num>    <num> ## 1:               190 42.96571 37.09805 43.69769 48.72301 ## 2:               210 45.66012 40.50693 46.31577 51.65163 pred_spec = pred_spec_auto(species, island, body_mass_g)  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new ##       species    island body_mass_g     mean      lwr     medn      upr ##        <fctr>    <fctr>       <num>    <num>    <num>    <num>    <num> ##  1:    Adelie    Biscoe        3200 40.31374 37.24373 40.31967 44.22824 ##  2: Chinstrap    Biscoe        3200 45.10582 42.63342 45.10859 47.60119 ##  3:    Gentoo    Biscoe        3200 42.81649 40.19221 42.55664 46.84035 ##  4:    Adelie     Dream        3200 40.16219 36.95895 40.34633 43.90681 ##  5: Chinstrap     Dream        3200 46.21778 43.53954 45.90929 49.19173 ## ---                                                                     ## 41: Chinstrap     Dream        5300 48.48139 46.36282 48.25679 51.02996 ## 42:    Gentoo     Dream        5300 45.91819 43.62832 45.54110 49.91622 ## 43:    Adelie Torgersen        5300 42.92879 40.66576 42.31072 46.76406 ## 44: Chinstrap Torgersen        5300 46.59576 44.80400 46.49196 49.03906 ## 45:    Gentoo Torgersen        5300 45.11384 42.95190 44.51289 49.27629 pd_new <- orsf_pd_new(fit_regr,                        expand_grid = FALSE,                       pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new ##        variable value     level     mean      lwr     medn      upr ##          <char> <num>    <char>    <num>    <num>    <num>    <num> ##  1:     species    NA    Adelie 41.90271 37.10417 41.51723 48.51478 ##  2:     species    NA Chinstrap 47.11314 42.40419 46.96478 51.51392 ##  3:     species    NA    Gentoo 44.37038 39.87306 43.89889 51.21635 ##  4:      island    NA    Biscoe 44.21332 37.22711 45.27862 51.21635 ##  5:      island    NA     Dream 44.43354 37.01471 45.57261 51.51392 ##  6:      island    NA Torgersen 43.29539 37.01513 44.26924 49.84391 ##  7: body_mass_g  3200      <NA> 42.84625 37.03978 43.95991 49.19173 ##  8: body_mass_g  3550      <NA> 43.53326 37.56730 44.43756 50.47092 ##  9: body_mass_g  3975      <NA> 44.30431 38.31567 45.22089 51.50683 ## 10: body_mass_g  4700      <NA> 45.22559 39.88199 46.34680 51.18955 ## 11: body_mass_g  5300      <NA> 45.91412 40.84742 46.95327 51.48851 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = custom_pred_spec,                       new_data = penguins_orsf_test)  pd_new ##    species island     mean      lwr     medn      upr ##     <fctr> <fctr>    <num>    <num>    <num>    <num> ## 1:  Adelie Biscoe 41.98024 37.22711 41.65252 48.51478"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"survival","dir":"Reference","previous_headings":"","what":"Survival","title":"Partial dependence — orsf_pd_oob","text":"Begin fitting oblique survival random forest:   Compute partial dependence using -bag data bili = c(1,2,3,4,5):     don’t specific values variable mind, let pred_spec_auto pick :     Specify pred_horizon get partial dependence value:     vector-valued pred_horizon input comes minimal extra computational cost. Use fine grid time values assess whether predictors time-varying effects. (see partial dependence vignette example)","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) pd_train <- orsf_pd_inb(fit_surv, pred_spec = list(bili = 1:5)) pd_train ##    pred_horizon  bili      mean        lwr      medn       upr ##           <num> <num>     <num>      <num>     <num>     <num> ## 1:      1826.25     1 0.2566200 0.02234786 0.1334170 0.8918909 ## 2:      1826.25     2 0.3121392 0.06853733 0.1896849 0.9204338 ## 3:      1826.25     3 0.3703242 0.11409793 0.2578505 0.9416791 ## 4:      1826.25     4 0.4240692 0.15645214 0.3331057 0.9591581 ## 5:      1826.25     5 0.4663670 0.20123406 0.3841700 0.9655296 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili)) pd_train ##    pred_horizon  bili      mean        lwr      medn       upr ##           <num> <num>     <num>      <num>     <num>     <num> ## 1:      1826.25  0.55 0.2481444 0.02035041 0.1242215 0.8801444 ## 2:      1826.25  0.70 0.2502831 0.02045039 0.1271039 0.8836536 ## 3:      1826.25  1.50 0.2797763 0.03964900 0.1601715 0.9041584 ## 4:      1826.25  3.50 0.3959349 0.13431288 0.2920400 0.9501230 ## 5:      1826.25  7.25 0.5351935 0.28064629 0.4652185 0.9783000 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili),                         pred_horizon = seq(500, 3000, by = 500)) pd_train ##     pred_horizon  bili      mean         lwr       medn       upr ##            <num> <num>     <num>       <num>      <num>     <num> ##  1:          500  0.55 0.0617199 0.000443399 0.00865419 0.5907104 ##  2:         1000  0.55 0.1418501 0.005793742 0.05572853 0.7360749 ##  3:         1500  0.55 0.2082505 0.013609478 0.09174558 0.8556319 ##  4:         2000  0.55 0.2679017 0.023047689 0.14574169 0.8910549 ##  5:         2500  0.55 0.3179617 0.063797305 0.20254500 0.9017710 ## ---                                                               ## 26:         1000  7.25 0.3264627 0.135343689 0.25956791 0.8884333 ## 27:         1500  7.25 0.4641265 0.218208755 0.38787435 0.9702903 ## 28:         2000  7.25 0.5511761 0.293367409 0.48427730 0.9812413 ## 29:         2500  7.25 0.6200238 0.371965247 0.56954399 0.9845058 ## 30:         3000  7.25 0.6803482 0.425128031 0.64642318 0.9888637"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Partial dependence — orsf_pd_oob","text":"Hooker, Giles, Mentch, Lucas, Zhou, Siyu (2021). \"Unrestricted permutation forces extrapolation: variable importance requires least one model, free variable importance.\" Statistics Computing, 31, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":null,"dir":"Reference","previous_headings":"","what":"Scale input data — orsf_scale_cph","title":"Scale input data — orsf_scale_cph","text":"functions exported users may access internal routines used scale inputs orsf_control_cph used.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Scale input data — orsf_scale_cph","text":"","code":"orsf_scale_cph(x_mat, w_vec = NULL)  orsf_unscale_cph(x_mat)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Scale input data — orsf_scale_cph","text":"x_mat (numeric matrix) matrix values scaled unscaled. Note orsf_unscale_cph accept x_mat inputs attribute containing transform values, added automatically orsf_scale_cph. w_vec (numeric vector) optional vector weights. weights supplied (default), observations equally weighted. supplied, w_vec must length equal nrow(x_mat).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Scale input data — orsf_scale_cph","text":"scaled unscaled x_mat.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Scale input data — orsf_scale_cph","text":"data transformed first subtracting mean multiplying scale. inverse transform can completed using orsf_unscale_cph dividing column corresponding scale adding mean. values means scales stored attribute output returned orsf_scale_cph (see examples)","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Scale input data — orsf_scale_cph","text":"","code":"x_mat <- as.matrix(pbc_orsf[, c('bili', 'age', 'protime')])  head(x_mat) #>   bili      age protime #> 1 14.5 58.76523    12.2 #> 2  1.1 56.44627    10.6 #> 3  1.4 70.07255    12.0 #> 4  1.8 54.74059    10.3 #> 5  3.4 38.10541    10.9 #> 7  1.0 55.53457     9.7  x_scaled <- orsf_scale_cph(x_mat)  head(x_scaled) #>             bili        age    protime #> [1,]  3.77308887  1.0412574  1.9694656 #> [2,] -0.75476469  0.7719344 -0.1822316 #> [3,] -0.65339483  2.3544852  1.7005035 #> [4,] -0.51823502  0.5738373 -0.5856748 #> [5,]  0.02240421 -1.3581657  0.2212116 #> [6,] -0.78855464  0.6660494 -1.3925613  attributes(x_scaled) # note the transforms attribute #> $dim #> [1] 276   3 #>  #> $dimnames #> $dimnames[[1]] #> NULL #>  #> $dimnames[[2]] #> [1] \"bili\"    \"age\"     \"protime\" #>  #>  #> $transforms #>           mean     scale #> [1,]  3.333696 0.3378995 #> [2,] 49.799661 0.1161396 #> [3,] 10.735507 1.3448108 #>   x_unscaled <- orsf_unscale_cph(x_scaled)  head(x_unscaled) #>      bili      age protime #> [1,] 14.5 58.76523    12.2 #> [2,]  1.1 56.44627    10.6 #> [3,]  1.4 70.07255    12.0 #> [4,]  1.8 54.74059    10.3 #> [5,]  3.4 38.10541    10.9 #> [6,]  1.0 55.53457     9.7  # numeric difference in x_mat and x_unscaled should be practically 0 max(abs(x_mat - x_unscaled)) #> [1] 3.552714e-15"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":null,"dir":"Reference","previous_headings":"","what":"Univariate summary — orsf_summarize_uni","title":"Univariate summary — orsf_summarize_uni","text":"Summarize univariate information ORSF object","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Univariate summary — orsf_summarize_uni","text":"","code":"orsf_summarize_uni(   object,   n_variables = NULL,   pred_horizon = NULL,   pred_type = NULL,   importance = NULL,   class = NULL,   verbose_progress = FALSE,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Univariate summary — orsf_summarize_uni","text":"object (ObliqueForest) trained oblique random forest object (see orsf). n_variables (integer) many variables summarized? Setting input lower number reduce computation time. pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_type (character) type predictions compute. Valid Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class regression: 'mean': predicted mean, .e., expected value importance (character) Indicate method variable importance: 'none': variable importance computed. 'anova': compute analysis variance (ANOVA) importance 'negate': compute negation importance 'permute': compute permutation importance class (character) relevant classification forests. NULL (default), summary statistics returned classes outcome, printed summaries show last class class levels. specify single class summarize, indicate name class class. E.g., categorical outcome class levels , B,  C, using class = \"\" restrict output class . details methods, see orsf_vi. verbose_progress (logical) TRUE, progress printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Univariate summary — orsf_summarize_uni","text":"object class 'orsf_summary', includes data importance individual predictors. expected values predictions specific values predictors.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Univariate summary — orsf_summarize_uni","text":"pred_horizon left unspecified, median value time--event variable object's training data used. recommended always specify prediction horizon, median time may especially meaningful horizon compute predicted risk values . object already variable importance values, can safely bypass computation variable importance function setting importance = 'none'.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Univariate summary — orsf_summarize_uni","text":"","code":"object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 25)  # since anova importance was used to make object, it is also # used for ranking variables in the summary, unless we specify # a different type of importance  orsf_summarize_uni(object, n_variables = 2) #>  #> -- ascites (VI Rank: 1) ------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3015582 0.2260172 0.05061925 0.5094704 #>       1 0.4399066 0.3806282 0.21066049 0.6766038 #>  #> -- bili (VI Rank: 2) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2513342 0.1518283 0.04385794 0.3892157 #>    0.80 0.2550326 0.1567469 0.04852839 0.3913328 #>    1.40 0.2728825 0.1957005 0.06644606 0.4149212 #>    3.52 0.3575481 0.3080642 0.13239033 0.5404010 #>    7.25 0.4546721 0.4081686 0.25099206 0.6485875 #>  #>  Predicted risk at time t = 1788 for top 2 predictors   # if we want to summarize object according to variables # ranked by negation importance, we can compute negation # importance within orsf_summarize_uni() as follows:  orsf_summarize_uni(object, n_variables = 2, importance = 'negate') #>  #> -- bili (VI Rank: 1) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2513342 0.1518283 0.04385794 0.3892157 #>    0.80 0.2550326 0.1567469 0.04852839 0.3913328 #>    1.40 0.2728825 0.1957005 0.06644606 0.4149212 #>    3.52 0.3575481 0.3080642 0.13239033 0.5404010 #>    7.25 0.4546721 0.4081686 0.25099206 0.6485875 #>  #> -- copper (VI Rank: 2) -------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    25.5 0.2443431 0.1635872 0.04039580 0.3885730 #>    42.8 0.2532196 0.1577342 0.04094236 0.3816909 #>    74.0 0.2825893 0.1941957 0.05571884 0.4381673 #>     129 0.3404826 0.2643601 0.12559610 0.5036362 #>     214 0.4148122 0.3460876 0.21514558 0.6128151 #>  #>  Predicted risk at time t = 1788 for top 2 predictors   # for multi-category fits, you can specify which class # you want to summarize:  object =  orsf(species ~ ., data = penguins_orsf, n_tree = 25)  orsf_summarize_uni(object, class = \"Adelie\", n_variables = 1) #>  #> -- bill_length_mm (VI Rank: 1) ------------------- #>  #>         |------------- Probability -------------| #>   Value      Mean     Median     25th %    75th % #>  <char>     <num>      <num>      <num>     <num> #>    36.6 0.6830776 0.84544074 0.32069482 0.9803987 #>    39.5 0.6482196 0.81423080 0.26784874 0.9706868 #>    44.5 0.3577357 0.30291802 0.01926994 0.6328028 #>    48.6 0.1951207 0.13157895 0.01474271 0.3016239 #>    50.8 0.1460624 0.08585581 0.01316055 0.2415948 #>  #>  Predicted probability for top 1 predictors"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Estimate training time — orsf_time_to_train","title":"Estimate training time — orsf_time_to_train","text":"Estimate training time","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Estimate training time — orsf_time_to_train","text":"","code":"orsf_time_to_train(object, n_tree_subset = NULL)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Estimate training time — orsf_time_to_train","text":"object untrained aorsf object n_tree_subset (integer)  many trees fit order estimate time needed train object. default value 10% trees specified object. .e., object n_tree 500, default value n_tree_subset 50.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Estimate training time — orsf_time_to_train","text":"difftime object.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Estimate training time — orsf_time_to_train","text":"","code":"# specify but do not train the model by setting no_fit = TRUE. object <- orsf(pbc_orsf, Surv(time, status) ~ . - id,                n_tree = 10, no_fit = TRUE)  # approximate the time it will take to grow 10 trees time_estimated <- orsf_time_to_train(object, n_tree_subset=1)  print(time_estimated) #> Time difference of 0.04316807 secs  # let's see how close the approximation was time_true_start <- Sys.time() orsf_train(object) time_true_stop <- Sys.time()  time_true <- time_true_stop - time_true_start  print(time_true) #> Time difference of 0.05726409 secs  # error abs(time_true - time_estimated) #> Time difference of 0.01409602 secs"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":null,"dir":"Reference","previous_headings":"","what":"Update Forest Parameters — orsf_update","title":"Update Forest Parameters — orsf_update","text":"Update Forest Parameters","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update Forest Parameters — orsf_update","text":"","code":"orsf_update(object, ..., modify_in_place = FALSE, no_fit = NULL)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update Forest Parameters — orsf_update","text":"object (ObliqueForest) oblique random forest object (see orsf). ... arguments plug orsf used define update. arguments include: data formula control weights n_tree n_split n_retry n_thread mtry sample_with_replacement sample_fraction leaf_min_events leaf_min_obs split_rule split_min_events split_min_obs split_min_stat pred_type oobag_pred_horizon oobag_eval_every oobag_fun importance importance_max_pvalue group_factors tree_seeds na_action verbose_progress Note can update control, change type forest. example, go classification regression orsf_update. modify_in_place (logical) TRUE, object modified inputs specified .... cautious, modification place overwrite existing data. FALSE (default), object copied modifications applied copy, leaving original object unmodified. no_fit (logical) TRUE, model fitting steps defined saved, training initiated. object returned can directly submitted orsf_train() long attach_data TRUE.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Update Forest Parameters — orsf_update","text":"ObliqueForest object.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Update Forest Parameters — orsf_update","text":"several dynamic inputs orsf default values NULL. Specifically, inputs control, weights, mtry, split_rule, split_min_stat, pred_type, pred_horizon, oobag_eval_function, tree_seeds, oobag_eval_every. explicit value given inputs call, re-formed. example, initial forest includes 17 predictors, default mtry smallest integer greater equal square root 17, .e., 5. , make updated forest 1 less predictor explicitly say mtry = 5, mtry re-initialized update based available 16 predictors, resulting value mtry 4. done avoid many potential errors occur dynamic outputs re-initialized.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Update Forest Parameters — orsf_update","text":"","code":"if (FALSE) { # initial fit has mtry of 5 fit <- orsf(pbc_orsf, time + status ~ . -id)  # note that mtry is now 4 (see details) fit_new <- orsf_update(fit, formula = . ~ . - edema, n_tree = 100)  # prevent dynamic updates by specifying inputs you want to freeze. fit_newer <- orsf_update(fit_new, mtry = 2) }"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":null,"dir":"Reference","previous_headings":"","what":"Variable Importance — orsf_vi","title":"Variable Importance — orsf_vi","text":"Estimate importance individual predictor variables using oblique random forests.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variable Importance — orsf_vi","text":"","code":"orsf_vi(   object,   group_factors = TRUE,   importance = NULL,   oobag_fun = NULL,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_vi_negate(   object,   group_factors = TRUE,   oobag_fun = NULL,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_vi_permute(   object,   group_factors = TRUE,   oobag_fun = NULL,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_vi_anova(object, group_factors = TRUE, verbose_progress = NULL, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variable Importance — orsf_vi","text":"object (ObliqueForest) trained oblique random forest object (see orsf). group_factors (logical) TRUE, importance factor variables reported overall aggregating importance individual levels factor. FALSE, importance individual factor levels returned. importance (character) Indicate method variable importance: 'anova': compute analysis variance (ANOVA) importance 'negate': compute negation importance 'permute': compute permutation importance oobag_fun (function) used evaluating --bag prediction accuracy negating coefficients (importance = 'negate') permuting values predictor (importance = 'permute') oobag_fun = NULL (default), evaluation statistic selected based tree type survival: Harrell's C-statistic (1982) classification: Area underneath ROC curve (AUC-ROC) regression: Traditional prediction R-squared use oobag_fun note following: oobag_fun three inputs: y_mat, w_vec, s_vec survival trees, y_mat two column matrix first column named 'time' second named 'status'. classification trees, y_mat matrix number columns = number distinct classes outcome. regression, y_mat matrix one column. s_vec numeric vector containing predictions oobag_fun return numeric output length 1 oobag_fun used created object initial value --bag prediction accuracy consistent values computed variable importance estimated. details, see --bag vignette. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variable Importance — orsf_vi","text":"orsf_vi functions return named numeric vector. Names vector predictor variables used object Values vector estimated importance given predictor. returned vector sorted highest lowest value, higher values indicating higher importance.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Variable Importance — orsf_vi","text":"ObliqueForest object grown importance = 'anova', 'negate', 'permute', output vector importance values based requested type importance. However, orsf_vi() can used compute variable importance growing forest compute different type importance. orsf_vi() general purpose function extract compute variable importance estimates ObliqueForest object (see orsf). orsf_vi_negate(), orsf_vi_permute(), orsf_vi_anova() wrappers orsf_vi(). way functions work depends whether object given already variable importance estimates (see examples).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"variable-importance-methods","dir":"Reference","previous_headings":"","what":"Variable importance methods","title":"Variable Importance — orsf_vi","text":"negation importance: variable assessed separately multiplying variable's coefficients -1 determining much model's performance changes. worse model's performance negating coefficients given variable, important variable. technique promising b/c require permutation emphasizes variables larger coefficients linear combinations, also relatively new studied much permutation importance. See Jaeger, (2023) details technique. permutation importance: variable assessed separately randomly permuting variable's values determining much model's performance changes. worse model's performance permuting values given variable, important variable. technique flexible, intuitive, frequently used. also several known limitations analysis variance (ANOVA) importance: p-value computed coefficient linear combination variables decision tree. Importance individual predictor variable proportion times p-value coefficient < 0.01. technique efficient computationally, may effective permutation negation terms selecting signal noise variables. See Menze, 2011 details technique.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"anova-importance","dir":"Reference","previous_headings":"","what":"ANOVA importance","title":"Variable Importance — orsf_vi","text":"default variable importance technique, ANOVA, calculated fit oblique random forest ensemble.     ANOVA default fast, may decisive permutation negation techniques variable selection.","code":"fit <- orsf(pbc_orsf, Surv(time, status) ~ . - id)  fit ## ---------- Oblique random survival forest ##  ##      Linear combinations: Accelerated Cox regression ##           N observations: 276 ##                 N events: 111 ##                  N trees: 500 ##       N predictors total: 17 ##    N predictors per node: 5 ##  Average leaves per tree: 21.022 ## Min observations in leaf: 5 ##       Min events in leaf: 1 ##           OOB stat value: 0.84 ##            OOB stat type: Harrell's C-index ##      Variable importance: anova ##  ## -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"raw-vi-values","dir":"Reference","previous_headings":"","what":"Raw VI values","title":"Variable Importance — orsf_vi","text":"‘raw’ variable importance values can accessed fit object     ‘raw’ values factors aggregated single value. Currently one value k-1 levels k level factor. example, can see edema_1 edema_0.5 importance values edema factor variable levels 0, 0.5, 1.","code":"fit$get_importance_raw() ##                   [,1] ## trt_placebo 0.06355042 ## age         0.23259259 ## sex_f       0.14700432 ## ascites_1   0.46791708 ## hepato_1    0.14349776 ## spiders_1   0.17371938 ## edema_0.5   0.17459191 ## edema_1     0.51197605 ## bili        0.40590758 ## chol        0.17666667 ## albumin     0.25972156 ## copper      0.28840580 ## alk.phos    0.10614251 ## ast         0.18327491 ## trig        0.12815626 ## platelet    0.09265648 ## protime     0.22656250 ## stage       0.20264766"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"collapse-vi-across-factor-levels","dir":"Reference","previous_headings":"","what":"Collapse VI across factor levels","title":"Variable Importance — orsf_vi","text":"get aggregated values across levels factor, access importance element orsf fit:     use orsf_vi() group_factors set TRUE (default)     Note can make default returned importance values ungrouped setting group_factors FALSE orsf_vi functions orsf function.","code":"# this assumes you used group_factors = TRUE in orsf() fit$importance ##    ascites       bili      edema     copper    albumin        age    protime  ## 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250  ##      stage        ast       chol    spiders        sex     hepato       trig  ## 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626  ##   alk.phos   platelet        trt  ## 0.10614251 0.09265648 0.06355042 orsf_vi(fit) ##    ascites       bili      edema     copper    albumin        age    protime  ## 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250  ##      stage        ast       chol    spiders        sex     hepato       trig  ## 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626  ##   alk.phos   platelet        trt  ## 0.10614251 0.09265648 0.06355042"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"add-vi-to-an-oblique-random-forest","dir":"Reference","previous_headings":"","what":"Add VI to an oblique random forest","title":"Variable Importance — orsf_vi","text":"can fit oblique random forest without VI, add VI later","code":"fit_no_vi <- orsf(pbc_orsf,                   Surv(time, status) ~ . - id,                   importance = 'none')  # Note: you can't call orsf_vi_anova() on fit_no_vi because anova # VI can only be computed while the forest is being grown.  orsf_vi_negate(fit_no_vi) ##        bili      copper         sex     protime         age       stage  ## 0.130439814 0.051880867 0.038308025 0.025115249 0.023826061 0.020354822  ##     albumin     ascites        chol         ast     spiders      hepato  ## 0.019997729 0.015918292 0.013320469 0.010086726 0.007409116 0.007326714  ##       edema         trt    alk.phos        trig    platelet  ## 0.006844435 0.003214544 0.002517057 0.002469545 0.001056829 orsf_vi_permute(fit_no_vi) ##          bili        copper           age       ascites       protime  ##  0.0592069141  0.0237362075  0.0136479213  0.0130805894  0.0123091354  ##         stage       albumin          chol        hepato           ast  ##  0.0117177661  0.0106414724  0.0064501213  0.0058813969  0.0057753740  ##         edema       spiders           sex          trig      platelet  ##  0.0052171180  0.0048427005  0.0023386947  0.0017883700  0.0013533691  ##      alk.phos           trt  ##  0.0006492029 -0.0009921507"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"oblique-random-forest-and-vi-all-at-once","dir":"Reference","previous_headings":"","what":"Oblique random forest and VI all at once","title":"Variable Importance — orsf_vi","text":"fit oblique random forest compute vi time     can still get negation VI fit, needs computed","code":"fit_permute_vi <- orsf(pbc_orsf,                        Surv(time, status) ~ . - id,                        importance = 'permute')  # get the vi instantly (i.e., it doesn't need to be computed again) orsf_vi_permute(fit_permute_vi) ##          bili        copper       ascites       protime       albumin  ##  0.0571305446  0.0243657146  0.0138318057  0.0133401675  0.0130746154  ##           age         stage          chol           ast       spiders  ##  0.0123610374  0.0102963203  0.0077895394  0.0075250059  0.0048628813  ##         edema        hepato           sex      platelet          trig  ##  0.0046003168  0.0039818730  0.0016891584  0.0012767063  0.0007324402  ##      alk.phos           trt  ##  0.0005128897 -0.0014443967 orsf_vi_negate(fit_permute_vi) ##        bili      copper         sex     protime       stage         age  ## 0.123331760 0.052544318 0.037291358 0.024977898 0.023239189 0.021934511  ##     albumin     ascites        chol         ast     spiders       edema  ## 0.020586632 0.014229536 0.014053040 0.012227048 0.007643156 0.006832766  ##      hepato         trt    alk.phos        trig    platelet  ## 0.006301693 0.004348705 0.002371797 0.002309396 0.001347035"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"custom-functions-for-vi","dir":"Reference","previous_headings":"","what":"Custom functions for VI","title":"Variable Importance — orsf_vi","text":"default prediction accuracy functions work well time:     sometimes want something specific defaults just won’t work. cases, can compute VI function ’d like measure prediction accuracy supplying valid function oobag_fun input. example, use mean absolute error . Higher values considered good aorsf computes prediction accuracy, make function return pseudo R-squared based mean absolute error:","code":"fit_standard <- orsf(penguins_orsf, bill_length_mm ~ ., tree_seeds = 1)  # Default method for prediction accuracy with VI is R-squared orsf_vi_permute(fit_standard) ##           species flipper_length_mm       body_mass_g     bill_depth_mm  ##      0.3725898166      0.3261834607      0.2225730676      0.1026569498  ##            island               sex              year  ##      0.0876071687      0.0844807334      0.0006978493 rsq_mae <- function(y_mat, w_vec, s_vec){    mae_standard <- mean(abs((y_mat - mean(y_mat)) * w_vec))  mae_fit <- mean(abs((y_mat - s_vec) * w_vec))    1 - mae_fit / mae_standard   }  fit_custom <- orsf_update(fit_standard, oobag_fun = rsq_mae)  # not much changes, but the difference between variables shrinks # and the ordering of sex and island has swapped orsf_vi_permute(fit_custom) ##           species flipper_length_mm       body_mass_g     bill_depth_mm  ##       0.206951751       0.193248912       0.140899603       0.076759148  ##               sex            island              year  ##       0.073042331       0.050851073       0.003633365"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Variable Importance — orsf_vi","text":"Harrell, E F, Califf, M R, Pryor, B D, Lee, L K, Rosati, R (1982). \"Evaluating yield medical tests.\" Jama, 247(18), 2543-2546. Breiman, Leo (2001). \"Random Forests.\" Machine Learning, 45(1), 5-32. ISSN 1573-0565. Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, F (2011). \"oblique random forests.\" Machine Learning Knowledge Discovery Databases: European Conference, ECML PKDD 2011, Athens, Greece, September 5-9, 2011, Proceedings, Part II 22, 453-469. Springer. Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM (2023). \"Accelerated interpretable oblique random survival forests.\" Journal Computational Graphical Statistics, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":null,"dir":"Reference","previous_headings":"","what":"Variable Interactions — orsf_vint","title":"Variable Interactions — orsf_vint","text":"Use variable interaction score described Greenwell et al (2018). method can computationally demanding, using n_thread=0 can substantially reduce time needed compute scores.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variable Interactions — orsf_vint","text":"","code":"orsf_vint(   object,   predictors = NULL,   n_thread = NULL,   verbose_progress = NULL,   sep = \"..\" )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variable Interactions — orsf_vint","text":"object (ObliqueForest) trained oblique random forest object (see orsf) predictors (character) vector length 2 names predictors used object. pairwise interactions predictors scored. NULL (default), predictors used. n_thread (integer) number threads use growing trees, computing predictions, computing importance. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. sep (character) separate names two predictors. default value \"..\" returns names name1..name2","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variable Interactions — orsf_vint","text":"data.table variable interaction scores partial dependence values.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Variable Interactions — orsf_vint","text":"number possible interactions grows exponentially based number predictors. caution warranted using large predictor sets recommended supply specific vector predictor names assess rather global search. good strategy use n_tree = 5 search predictors, pick top 10 interactions, get unique predictors , re-run just predictors trees.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Variable Interactions — orsf_vint","text":"Greenwell, M B, Boehmke, C B, McCarthy, J (2018). \"simple effective model-based variable importance measure.\" arXiv preprint arXiv:1805.04755.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Variable Interactions — orsf_vint","text":"","code":"set.seed(329)  data <- data.frame(  x1 = rnorm(500),  x2 = rnorm(500),  x3 = rnorm(500) )  data$y = with(data, expr = x1 + x2 + x3 + 1/2*x1 * x2 + x2 * x3 + rnorm(500))  forest <- orsf(data, y ~ ., n_tree = 5)  orsf_vint(forest) #>    interaction     score          pd_values #>         <char>     <num>             <list> #> 1:      x2..x3 0.8021932 <data.table[25x9]> #> 2:      x1..x2 0.5095065 <data.table[25x9]> #> 3:      x1..x3 0.1133252 <data.table[25x9]>"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":null,"dir":"Reference","previous_headings":"","what":"Variable selection — orsf_vs","title":"Variable selection — orsf_vs","text":"Variable selection","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variable selection — orsf_vs","text":"","code":"orsf_vs(object, n_predictor_min = 3, verbose_progress = NULL)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variable selection — orsf_vs","text":"object (ObliqueForest) trained oblique random forest object (see orsf). n_predictor_min (integer) minimum number predictors allowed verbose_progress (logical) implemented yet. progress printed console?","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variable selection — orsf_vs","text":"data.table four columns: n_predictors: number predictors used stat_value: --bag statistic variables_included: names variables included predictors_included: names predictors included predictor_dropped: predictor selected dropped","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Variable selection — orsf_vs","text":"difference variables_included predictors_included referent coding. variable name factor variable training data, predictor name factor levels factor appended. example, variable diabetes levels = c(\"\", \"yes\"), variable name diabetes predictor name diabetes_yes. tree_seeds specified object successive run orsf evaluated --bag samples initial run.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Variable selection — orsf_vs","text":"","code":"object <- orsf(formula = time + status ~ .,                data = pbc_orsf,                n_tree = 25,                importance = 'anova')  orsf_vs(object, n_predictor_min = 15) #>    n_predictors stat_value                        variables_included #>           <int>      <num>                                    <list> #> 1:           15  0.8356685     age,albumin,ascites,ast,bili,chol,... #> 2:           16  0.8351997     age,albumin,ascites,ast,bili,chol,... #> 3:           17  0.8296786     age,albumin,ascites,ast,bili,chol,... #> 4:           18  0.8185322 age,albumin,alk.phos,ascites,ast,bili,... #>                                predictors_included predictor_dropped #>                                             <list>            <char> #> 1:  id,age,sex_f,ascites_1,spiders_1,edema_0.5,...          platelet #> 2:   id,age,sex_f,ascites_1,hepato_1,spiders_1,...          hepato_1 #> 3: id,trt_placebo,age,sex_f,ascites_1,hepato_1,...       trt_placebo #> 4: id,trt_placebo,age,sex_f,ascites_1,hepato_1,...          alk.phos"},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":null,"dir":"Reference","previous_headings":"","what":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"data light modification survival::pbc data. modifications :","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"","code":"pbc_orsf"},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"data frame 276 rows 20 variables: id case number time number days registration earlier death, transplantion, study analysis July, 1986 status status endpoint, 0 censored transplant, 1 dead trt randomized treatment group: D-penicillmain placebo age years sex m/f ascites presence ascites hepato presence hepatomegaly enlarged liver spiders blood vessel malformations skin edema 0 edema, 0.5 untreated successfully treated, 1 edema despite diuretic therapy bili serum bilirubin (mg/dl) chol serum cholesterol (mg/dl) albumin serum albumin (g/dl) copper urine copper (ug/day) alk.phos alkaline phosphotase (U/liter) ast aspartate aminotransferase, called SGOT (U/ml) trig triglycerides (mg/dl) platelet platelet count protime standardized blood clotting time stage histologic stage disease (needs biopsy)","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"T Therneau P Grambsch (2000), Modeling Survival Data: Extending Cox Model, Springer-Verlag, New York. ISBN: 0-387-98784-3.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"removed rows missing data converted status 0 censor transplant, 1 dead converted stage ordered factor. converted trt, ascites, hepato, spiders, edema factors.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":null,"dir":"Reference","previous_headings":"","what":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"data copied lightly modified penguins data palmerpenguins R package. modification removal rows missing data. data include measurements penguin species, island Palmer Archipelago, size (flipper length, body mass, bill dimensions), sex.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"","code":"penguins_orsf"},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"tibble 333 rows 8 variables: species factor denoting penguin species (Adélie, Chinstrap Gentoo) island factor denoting island Palmer Archipelago, Antarctica (Biscoe, Dream Torgersen) bill_length_mm number denoting bill length (millimeters) bill_depth_mm number denoting bill depth (millimeters) flipper_length_mm integer denoting flipper length (millimeters) body_mass_g integer denoting body mass (grams) sex factor denoting penguin sex (female, male) year integer denoting study year (2007, 2008, 2009)","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"Adélie penguins: Palmer Station Antarctica LTER K. Gorman. 2020. Structural size measurements isotopic signatures foraging among adult male female Adélie penguins (Pygoscelis adeliae) nesting along Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative. doi:10.6073/pasta/98b16d7d563f265cb52372c8ca99e60f Gentoo penguins: Palmer Station Antarctica LTER K. Gorman. 2020. Structural size measurements isotopic signatures foraging among adult male female Gentoo penguin (Pygoscelis papua) nesting along Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative. doi:10.6073/pasta/7fca67fb28d56ee2ffa3d9370ebda689 Chinstrap penguins: Palmer Station Antarctica LTER K. Gorman. 2020. Structural size measurements isotopic signatures foraging among adult male female Chinstrap penguin (Pygoscelis antarcticus) nesting along Palmer Archipelago near Palmer Station, 2007-2009 ver 6. Environmental Data Initiative. doi:10.6073/pasta/c14dfcfada8ea13a17536e73eb6fbe9e Originally published : Gorman KB, Williams TD, Fraser WR (2014) Ecological Sexual Dimorphism Environmental Variability within Community Antarctic Penguins (Genus Pygoscelis). PLoS ONE 9(3): e90081. doi:10.1371/journal.pone.0090081","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":null,"dir":"Reference","previous_headings":"","what":"Automatic variable values for dependence — pred_spec_auto","title":"Automatic variable values for dependence — pred_spec_auto","text":"partial dependence individual conditional expectations, function allows variable considered without specify values set variable . values used based quantiles continuous variables (10th, 25th, 50th, 75th, 90th) unique categories categorical variables.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Automatic variable values for dependence — pred_spec_auto","text":"","code":"pred_spec_auto(...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Automatic variable values for dependence — pred_spec_auto","text":"... names variables use. can quotes quotes (see examples).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Automatic variable values for dependence — pred_spec_auto","text":"character vector names","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Automatic variable values for dependence — pred_spec_auto","text":"function used context orsf_pd orsf_ice functions.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Automatic variable values for dependence — pred_spec_auto","text":"","code":"fit <- orsf(penguins_orsf, species ~., n_tree = 5)  orsf_pd_oob(fit, pred_spec_auto(flipper_length_mm)) #> Key: <class> #>         class flipper_length_mm      mean         lwr       medn   upr #>        <fctr>             <num>     <num>       <num>      <num> <num> #>  1:    Adelie               185 0.6510597 0.008691406 0.93333333     1 #>  2:    Adelie               190 0.6376856 0.007812500 0.93333333     1 #>  3:    Adelie               197 0.6051195 0.007812500 0.93170380     1 #>  4:    Adelie               213 0.4517576 0.007812500 0.48514851     1 #>  5:    Adelie               221 0.4441207 0.007812500 0.48514851     1 #>  6: Chinstrap               185 0.3277862 0.009615385 0.06848291     1 #>  7: Chinstrap               190 0.3462555 0.009615385 0.08347478     1 #>  8: Chinstrap               197 0.3591037 0.009615385 0.08670635     1 #>  9: Chinstrap               213 0.4371854 0.009900990 0.33333333     1 #> 10: Chinstrap               221 0.4010776 0.009900990 0.33333333     1 #> 11:    Gentoo               185 0.5947110 0.057954545 0.50000000     1 #> 12:    Gentoo               190 0.6487316 0.062500000 0.65885417     1 #> 13:    Gentoo               197 0.8075340 0.071428571 0.99218750     1 #> 14:    Gentoo               213 0.6520185 0.047619048 0.95000000     1 #> 15:    Gentoo               221 0.6525966 0.047619048 0.93828125     1"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction for ObliqueForest Objects — predict.ObliqueForest","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Compute predicted values oblique random forest. Predictions may returned aggregate (.e., averaging trees) tree-specific.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"","code":"# S3 method for ObliqueForest predict(   object,   new_data = NULL,   pred_type = NULL,   pred_horizon = NULL,   pred_aggregate = TRUE,   pred_simplify = FALSE,   oobag = FALSE,   na_action = NULL,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"object (ObliqueForest) trained oblique random forest object (see orsf). new_data data.frame, tibble, data.table compute predictions . pred_type (character) type predictions compute. Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class 'class': predicted class regression: 'mean': predicted mean, .e., expected value pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_aggregate (logical) TRUE (default), predictions aggregated trees taking mean. FALSE, returned output contain one row per observation one column tree. length pred_horizon two pred_aggregate FALSE, result list matrices, 'th item list corresponding 'th value pred_horizon. pred_simplify (logical) FALSE (default), predictions always returned numeric matrix list numeric matrices. TRUE, predictions may simplified vector, e.g., pred_type 'mort' survival 'class' classification, array matrices length(pred_horizon) > 1. oobag (logical) FALSE (default), predictions computed using trees observation. TRUE, --bag predictions computed. input parameter set TRUE new_data NULL. na_action (character) happen new_data contains missing values (.e., NA values). Valid options : 'fail' : error thrown new_data contains NA values 'pass' : output NA rows new_data 1 NA value predictors used object 'omit' : rows new_data incomplete data dropped 'impute_meanmode' : missing values continuous categorical variables new_data imputed using mean mode, respectively. clarify, mean mode used impute missing values training data object, new_data. boundary_checks (logical) TRUE, pred_horizon checked make sure requested values less maximum observed time object's training data. FALSE, checks skipped. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"matrix predictions. Column j matrix corresponds value j pred_horizon. Row matrix corresponds row new_data.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"new_data must columns equivalent types data used train object. Also, factors new_data must levels data used train object. pred_horizon values exceed maximum follow-time object's training data, truly want , set boundary_checks = FALSE can use pred_horizon large want. Note predictions beyond maximum follow-time object's training data equal predictions maximum follow-time, aorsf estimate survival beyond maximum observed time. unspecified, pred_horizon may automatically specified value used oobag_pred_horizon object created (see orsf).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"","code":"library(aorsf)"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"classification","dir":"Reference","previous_headings":"","what":"Classification","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Predict probability class predicted class:","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) # predicted probabilities, the default predict(fit_clsf,          new_data = penguins_orsf_test[1:5, ],         pred_type = 'prob') ##         Adelie  Chinstrap      Gentoo ## [1,] 0.9405310 0.04121955 0.018249405 ## [2,] 0.9628988 0.03455909 0.002542096 ## [3,] 0.9032074 0.08510528 0.011687309 ## [4,] 0.9300133 0.05209040 0.017896329 ## [5,] 0.7965703 0.16243492 0.040994821 # predicted class (as a matrix by default) predict(fit_clsf,          new_data = penguins_orsf_test[1:5, ],         pred_type = 'class') ##      [,1] ## [1,]    1 ## [2,]    1 ## [3,]    1 ## [4,]    1 ## [5,]    1 # predicted class (as a factor if you use simplify) predict(fit_clsf,          new_data = penguins_orsf_test[1:5, ],         pred_type = 'class',         pred_simplify = TRUE) ## [1] Adelie Adelie Adelie Adelie Adelie ## Levels: Adelie Chinstrap Gentoo"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"regression","dir":"Reference","previous_headings":"","what":"Regression","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Predict mean value outcome:","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) predict(fit_regr,          new_data = penguins_orsf_test[1:5, ],          pred_type = 'mean') ##          [,1] ## [1,] 37.74136 ## [2,] 37.42367 ## [3,] 37.04598 ## [4,] 39.89602 ## [5,] 39.14848"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"survival","dir":"Reference","previous_headings":"","what":"Survival","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Begin fitting oblique survival random forest:   Predict risk, survival, cumulative hazard one several times:             Predict mortality, defined number events forest’s population observations characteristics like current observation. type prediction require specify prediction horizon","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) # predicted risk, the default predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'risk',          pred_horizon = c(500, 1000, 1500)) ##             [,1]        [,2]       [,3] ## [1,] 0.013648562 0.058393393 0.11184029 ## [2,] 0.003811413 0.026857586 0.04774151 ## [3,] 0.030548361 0.100600301 0.14847107 ## [4,] 0.040381075 0.169596943 0.27018952 ## [5,] 0.001484698 0.006663576 0.01337655 # predicted survival, i.e., 1 - risk predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'surv',         pred_horizon = c(500, 1000, 1500)) ##           [,1]      [,2]      [,3] ## [1,] 0.9863514 0.9416066 0.8881597 ## [2,] 0.9961886 0.9731424 0.9522585 ## [3,] 0.9694516 0.8993997 0.8515289 ## [4,] 0.9596189 0.8304031 0.7298105 ## [5,] 0.9985153 0.9933364 0.9866235 # predicted cumulative hazard function # (expected number of events for person i at time j) predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'chf',         pred_horizon = c(500, 1000, 1500)) ##             [,1]        [,2]       [,3] ## [1,] 0.015395388 0.067815817 0.14942956 ## [2,] 0.004022524 0.028740305 0.05424314 ## [3,] 0.034832754 0.127687156 0.20899732 ## [4,] 0.059978334 0.233048809 0.42562310 ## [5,] 0.001651365 0.007173177 0.01393016 predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'mort') ##           [,1] ## [1,] 23.405016 ## [2,] 15.362916 ## [3,] 26.180648 ## [4,] 36.515629 ## [5,]  5.856674"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":null,"dir":"Reference","previous_headings":"","what":"Inspect Forest Parameters — print.ObliqueForest","title":"Inspect Forest Parameters — print.ObliqueForest","text":"Printing ORSF model tells : Linear combinations: identified? N observations: Number rows training data N events: Number events training data N trees: Number trees forest N predictors total: Total number columns predictor matrix N predictors per node: Number variables used linear combinations Average leaves per tree: proxy depth trees Min observations leaf: See leaf_min_obs orsf Min events leaf: See leaf_min_events orsf OOB stat value: --bag error fitting trees OOB stat type: --bag error computed? Variable importance: variable importance computed?","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Inspect Forest Parameters — print.ObliqueForest","text":"","code":"# S3 method for ObliqueForest print(x, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Inspect Forest Parameters — print.ObliqueForest","text":"x (ObliqueForest) oblique random survival forest (ORSF; see orsf). ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Inspect Forest Parameters — print.ObliqueForest","text":"x, invisibly.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Inspect Forest Parameters — print.ObliqueForest","text":"","code":"object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 5)  print(object) #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 5 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 20.8 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.76 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":null,"dir":"Reference","previous_headings":"","what":"Print ORSF summary — print.orsf_summary_uni","title":"Print ORSF summary — print.orsf_summary_uni","text":"Print ORSF summary","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Print ORSF summary — print.orsf_summary_uni","text":"","code":"# S3 method for orsf_summary_uni print(x, n_variables = NULL, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Print ORSF summary — print.orsf_summary_uni","text":"x object class 'orsf_summary' n_variables number variables print ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Print ORSF summary — print.orsf_summary_uni","text":"invisibly, x","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Print ORSF summary — print.orsf_summary_uni","text":"","code":"object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 25)  smry <- orsf_summarize_uni(object, n_variables = 2)  print(smry) #>  #> -- ascites (VI Rank: 1) ------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3087374 0.1859818 0.04183841 0.5614236 #>       1 0.4965395 0.4214549 0.30005793 0.7145065 #>  #> -- bili (VI Rank: 2) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2404645 0.1342975 0.03456875 0.3904944 #>    0.80 0.2425520 0.1435297 0.03507037 0.3904944 #>    1.40 0.2625349 0.1554767 0.04820122 0.4168304 #>    3.52 0.3795922 0.3167143 0.15794919 0.5811623 #>    7.25 0.4682454 0.4348054 0.25161269 0.6726243 #>  #>  Predicted risk at time t = 1788 for top 2 predictors"},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-015-unreleased","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.5 (unreleased)","title":"aorsf 0.1.5 (unreleased)","text":"fixed issue omitting NA values cause error regression forests.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-014","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.4","title":"aorsf 0.1.4","text":"CRAN release: 2024-05-03 orsf_vs now returns column contains non-reference coded variable names (see https://github.com/ropensci/aorsf/pull/52). orsf_vs longer throws error n_predictor_min = 1 used (see https://github.com/ropensci/aorsf/pull/58). orsf_summarize_uni now allows specification class summarize oblique classification forests (see https://github.com/ropensci/aorsf/pull/57). fixed issue orsf throw uninformative error predictors categorical (see https://github.com/ropensci/aorsf/pull/56) oblique random forests can now compute --bag predictions modified versions training data (see https://github.com/ropensci/aorsf/pull/54) Setting oobag_pred_type 'none' growing forest longer necessitates specification pred_type calling predict later (see https://github.com/ropensci/aorsf/pull/48). Setting sample_fraction 1 longer result empty oobag_rows forest object (cause R crash forest passed C++; see https://github.com/ropensci/aorsf/pull/48) Re-worked creation maintenance oobag_denom C++ routines (see https://github.com/ropensci/aorsf/pull/48). Restricted mean survival time now used pred_type = 'time' instead median survival time (See https://github.com/ropensci/aorsf/pull/46).","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-013","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.3","title":"aorsf 0.1.3","text":"CRAN release: 2024-01-22 minor changes partial dependence vignette resolve code sanitization errors.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-012","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.2","title":"aorsf 0.1.2","text":"CRAN release: 2024-01-15 Allowed option \"time\" pred_type predict partial dependence predict survival time (see https://github.com/ropensci/aorsf/issues/37). Added pred_spec_auto() convenient specification variables partial dependence. Partial dependence now runs much faster multiple threads. Added orsf_vint() compute variable interaction scores using partial dependence. Added orsf_update(), can copy modify obliqueForest modify place. Added orsf_control functions classification, regression, survival (https://github.com/ropensci/aorsf/pull/25). optimization implemented matrix multiplication prediction (https://github.com/ropensci/aorsf/pull/20)","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-011","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.1","title":"aorsf 0.1.1","text":"CRAN release: 2023-10-26 Fixed uninitialized value pd_type Fixed various issues related memory leaks","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-010","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.0","title":"aorsf 0.1.0","text":"CRAN release: 2023-10-13 Re-worked internal C++ routines following design ranger. Re-worked progress printed console verbose_progress TRUE, following design ranger. Messages now indicate action taken, % complete, approximate time finishing action. Improved variable importance, following design ranger. Importance now computed tree--tree instead aggregate. Additionally, mortality type prediction used importance survival trees, since mortality depend pred_horizon. Allowed multi-threading performed orsf(), predict.orsf_fit(), functions orsf_vi() orsf_pd() family. Allowed sampling without replacement sampling specific fraction observations orsf() Included Harrell’s C-statistic option assessing goodness splits growing trees. Fixed issue uninformative error message occur pred_horizon > max(time) orsf_summarize_uni. Thanks @JyHao1 @DustinMLong finding !","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-007","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.7","title":"aorsf 0.0.7","text":"CRAN release: 2023-01-12 Additional changes internal testing avoid problems ATLAS","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-006","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.6","title":"aorsf 0.0.6","text":"CRAN release: 2023-01-06 Minor fix internal tests failing run ATLAS","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-005","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.5","title":"aorsf 0.0.5","text":"CRAN release: 2022-12-14 orsf() longer throws errors warnings try give single predictor. note added documentation details ?orsf explains using single predictor orsf() somewhat useless. done resolve https://github.com/mlr-org/mlr3extralearners/issues/259. predict.orsf_fit now accepts pred_horizon = 0 returns sensible values. Thanks @mattwarkentin feature request. added function perform variable selection, orsf_vs(). Made variable importance consistent respect group_factors. Originally, output orsf ungrouped VI values orsf_vi grouped values. update, orsf defaults grouped values. ungrouped values can still recovered. Fixed issue orsf_pd functions output data returned original scale.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-004","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.4","title":"aorsf 0.0.4","text":"CRAN release: 2022-11-07 orsf formulas now accepts Surv objects (see https://github.com/ropensci/aorsf/issues/11) Added verbose_progress input orsf, prints messages console indicating progress. Allowance missing values orsf. Mean mode imputation performed observations missing data. values can also used impute new data missing values. Centering scaling predictors now done prior growing forest.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-003","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.3","title":"aorsf 0.0.3","text":"CRAN release: 2022-10-09 Included rOpenSci reviewers Christopher Jackson, Marvin N Wright, Lukas Burk DESCRIPTION reviewers. Thank ! Added clarification docs pros/cons different variable importance techniques Added regression tests aorsf versus obliqueRSF (similar) Additional support tests functions long right hand sides Updated --bag vignette appropriate custom functions. Allow status values input data general, .e., just 0 1. Allow missing values predict functions, including partial dependence.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-002","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.2","title":"aorsf 0.0.2","text":"CRAN release: 2022-09-05 Modified unit tests compatibility extra checks run CRAN.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-001","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.1","title":"aorsf 0.0.1","text":"CRAN release: 2022-08-23 Added orsf_control_custom(), allows users submit custom functions identifying linear combinations inputs growing oblique decision trees. Added weights input orsf, allowing users fit orsf specific data training set. Added chf mort options predict.orsf_fit(). Mortality predictions fully implemented yet - supported partial dependence --bag error estimates. features added future update.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-0009000","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.0.9000","title":"aorsf 0.0.0.9000","text":"Core features implemented: fit, interpret, predict using oblique random survival forests. Vignettes + Readme covering usage core features. Website hosted GitHub pages, managed pkgdown.","code":""}]
+[{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to aorsf","title":"Contributing to aorsf","text":"Want contribute aorsf? Great! aorsf initially stable state development, great deal active subsequent development envisioned. outline propose change aorsf. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to aorsf","text":"can fix typos, spelling mistakes, grammatical errors documentation directly using GitHub web interface, long changes made source file. generally means ’ll need edit roxygen2 comments .R, .Rd file. can find .R file generates .Rd reading comment first line.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"bigger-changes","dir":"","previous_headings":"","what":"Bigger changes","title":"Contributing to aorsf","text":"want make bigger change, ’s good idea first file issue make sure someone team agrees ’s needed. ’ve found bug, please file issue illustrates bug minimal reprex (also help write unit test, needed).","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"Bigger changes","what":"Pull request process","title":"Contributing to aorsf","text":"Fork package clone onto computer. haven’t done , recommend using usethis::create_from_github(\"ropensci/aorsf\", fork = TRUE). Install development dependencies devtools::install_dev_deps(), make sure package passes R CMD check running devtools::check(). R CMD check doesn’t pass cleanly, ’s good idea ask help continuing. Create Git branch pull request (PR). recommend using usethis::pr_init(\"brief-description--change\"). Make changes, commit git, create PR running usethis::pr_push(), following prompts browser. title PR briefly describe change. body PR contain Fixes #issue-number. user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Bigger changes","what":"Code style","title":"Contributing to aorsf","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat unit tests. Contributions test cases included easier accept.","code":""},{"path":"https://bcjaeger.github.io/aorsf/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to aorsf","text":"Please note aorsf project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://bcjaeger.github.io/aorsf/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2022 aorsf authors (Byron C. Jaeger, Sawyer Welden, Nicholas M. Pajewski) Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"background","dir":"Articles","previous_headings":"","what":"Background","title":"Introduction to aorsf","text":"oblique random forest (RF) extension traditional (axis-based) RF. Instead using single variable split data grow new branches, trees oblique RF use weighted combination multiple variables.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"oblique-rfs-for-survival-classification-and-regression","dir":"Articles","previous_headings":"","what":"Oblique RFs for survival, classification, and regression","title":"Introduction to aorsf","text":"purpose aorsf (‘’ short accelerated) provide unifying framework fit oblique RFs can scale adequately large data sets. fastest algorithms available package used default often equivalent prediction accuracy computational approaches. center piece aorsf orsf() function. initial versions aorsf, orsf() function fit oblique random survival forests, now allows classification, regression, survival forests. (may introduce orf() function future name orsf() misleading users.) classification, fit oblique RF predict penguin species using penguin data magnificent palmerpenguins R package regression, use data predict bill length penguins: personal favorite oblique survival RF accelerated Cox regression great combination prediction accuracy computational efficiency (see JCGS paper). , predict mortality risk following diagnosis primary biliary cirrhosis: may notice first input aorsf data. design choice makes easier use orsf pipes (.e., %>% |>). instance,","code":"# An oblique classification RF penguin_fit <- orsf(data = penguins_orsf, formula = species ~ .)  penguin_fit #> ---------- Oblique random classification forest #>  #>      Linear combinations: Accelerated Logistic regression #>           N observations: 333 #>                N classes: 3 #>                  N trees: 500 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 5.542 #> Min observations in leaf: 5 #>           OOB stat value: 1.00 #>            OOB stat type: AUC-ROC #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique regression RF bill_fit <- orsf(data = penguins_orsf, formula = bill_length_mm ~ .)  bill_fit #> ---------- Oblique random regression forest #>  #>      Linear combinations: Accelerated Linear regression #>           N observations: 333 #>                  N trees: 500 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 49.958 #> Min observations in leaf: 5 #>           OOB stat value: 0.81 #>            OOB stat type: RSQ #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique survival RF pbc_fit <- orsf(data = pbc_orsf,                  n_tree = 5,                 formula = Surv(time, status) ~ . - id)  pbc_fit #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 5 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 21.6 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.77 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> ----------------------------------------- library(dplyr)  pbc_fit <- pbc_orsf |>   select(-id) |>   orsf(formula = Surv(time, status) ~ .,       n_tree = 5)"},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"interpretation","dir":"Articles","previous_headings":"","what":"Interpretation","title":"Introduction to aorsf","text":"aorsf includes several functions dedicated interpretation ORSFs, estimation partial dependence variable importance.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"variable-importance","dir":"Articles","previous_headings":"Interpretation","what":"Variable importance","title":"Introduction to aorsf","text":"multiple methods compute variable importance, can applied type oblique forest. compute negation importance, ORSF multiplies coefficient variable -1 re-computes --sample (sometimes referred --bag) accuracy ORSF model. can also compute variable importance using permutation, classical approach noises predictor assigned resulting degradation prediction accuracy importance predictor. faster alternative permutation negation importance ANOVA importance, computes proportion times variable obtains low p-value (p < 0.01) forest grown.","code":"orsf_vi_negate(pbc_fit) #>          bili           age        copper           ast           sex  #>  0.1468851774  0.0606952129  0.0246435580  0.0224269123  0.0175587328  #>          trig      alk.phos       protime         edema          chol  #>  0.0096895007  0.0093198869  0.0086039712  0.0006382134 -0.0015687436  #>       ascites      platelet        hepato       spiders           trt  #> -0.0060269468 -0.0102280228 -0.0108549805 -0.0113883544 -0.0201827916  #>         stage       albumin  #> -0.0221462608 -0.0224072750 orsf_vi_permute(penguin_fit) #>    bill_length_mm flipper_length_mm     bill_depth_mm            island  #>      0.1724983056      0.1024126291      0.0751508005      0.0676077927  #>       body_mass_g               sex              year  #>      0.0626576714      0.0186787401      0.0009286133 orsf_vi_anova(bill_fit) #>           species               sex            island flipper_length_mm  #>        0.34861430        0.21055730        0.11626929        0.08843136  #>       body_mass_g     bill_depth_mm              year  #>        0.07642887        0.06077348        0.01475293"},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"partial-dependence-pd","dir":"Articles","previous_headings":"Interpretation","what":"Partial dependence (PD)","title":"Introduction to aorsf","text":"Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model’s prediction. PD, see vignette","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"individual-conditional-expectations-ice","dir":"Articles","previous_headings":"Interpretation","what":"Individual conditional expectations (ICE)","title":"Introduction to aorsf","text":"Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor. ICE, see vignette","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"what-about-the-original-orsf","dir":"Articles","previous_headings":"","what":"What about the original ORSF?","title":"Introduction to aorsf","text":"original ORSF (.e., obliqueRSF) used glmnet find linear combinations inputs. aorsf allows users implement approach using orsf_control_survival(method = 'net') function: net forests fit lot faster original ORSF function obliqueRSF. However, net forests still much slower cph ones.","code":"orsf_net <- orsf(data = pbc_orsf,                   formula = Surv(time, status) ~ . - id,                   control = orsf_control_survival(method = 'net'))"},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"aorsf-and-other-machine-learning-software","dir":"Articles","previous_headings":"","what":"aorsf and other machine learning software","title":"Introduction to aorsf","text":"unique feature aorsf fast algorithms fit ORSF ensembles. RLT obliqueRSF fit oblique random survival forests, aorsf faster. ranger randomForestSRC fit survival forests, neither package supports oblique splitting. obliqueRF fits oblique random forests classification regression, survival. PPforest fits oblique random forests classification survival. Note: default prediction behavior aorsf models produce predicted risk specific prediction horizon, default ranger randomForestSRC. think change future, computing time independent predictions aorsf helpful.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/aorsf.html","id":"learning-more","dir":"Articles","previous_headings":"","what":"Learning more","title":"Introduction to aorsf","text":"aorsf began dedicated package oblique random survival forests, papers published far focused survival analysis risk prediction. However, routines regression classification oblique RFs aorsf high overlap survival ones. See orsf details oblique random survival forests. see JCGS paper details algorithms used specifically aorsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"go-faster","dir":"Articles","previous_headings":"","what":"Go faster","title":"Tips to speed up computation","text":"Analyses can slow crawl models need hours run. article find tricks prevent bottleneck using orsf().","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"dont-specify-a-control","dir":"Articles","previous_headings":"","what":"Don’t specify a control","title":"Tips to speed up computation","text":"default control orsf() NULL , unspecified, orsf() pick fastest possible control depending type forest grown. default control run-time compared approaches can striking. example:","code":"time_fast <- system.time(  expr = orsf(pbc_orsf,               formula = time+status~. -id,               n_tree = 5) )  time_net <- system.time(  expr = orsf(pbc_orsf,               formula = time+status~. -id,               control = orsf_control_survival(method = 'net'),               n_tree = 5) )  # unspecified control is much faster time_net['elapsed'] / time_fast['elapsed'] #>  elapsed  #> 45.09091"},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"use-n_thread","dir":"Articles","previous_headings":"","what":"Use n_thread","title":"Tips to speed up computation","text":"n_thread argument uses multi-threading run aorsf functions parallel possible. know many threads want, e.g. want exactly 5, set n_thread = 5. aren’t sure many threads available want use feasible amount, using n_thread = 0 (default) tells aorsf . Note: sometimes multi-threading possible. example, R single threaded language, multi-threading applied orsf() needs call R functions C++, occurs customized R function used find linear combination variables compute prediction accuracy.","code":"# automatically pick number of threads based on amount available  orsf(pbc_orsf,       formula = time+status~. -id,       n_tree = 5,      n_thread = 0)"},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"do-less","dir":"Articles","previous_headings":"","what":"Do less","title":"Tips to speed up computation","text":"inputs orsf() can adjusted make run faster: set n_retry 0 set oobag_pred_type 'none' set importance 'none' increase split_min_events, split_min_obs, leaf_min_events, leaf_min_obs make trees stop growing sooner increase split_min_stat enforce strict requirements growing deeper trees. Applying tips: modifying inputs can make orsf() run faster, can also impact prediction accuracy.","code":"orsf(pbc_orsf,       formula = time+status~.,       n_thread = 0,       n_tree = 5,       n_retry = 0,      oobag_pred_type = 'none',       importance = 'none',      split_min_events = 20,       leaf_min_events = 10,      split_min_stat = 10)"},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"show-progress","dir":"Articles","previous_headings":"","what":"Show progress","title":"Tips to speed up computation","text":"Setting verbose_progress = TRUE doesn’t make anything run faster, can help make feel like things running less slow.","code":"verbose_fit <- orsf(pbc_orsf,                      formula = time+status~. -id,                      n_tree = 5,                      verbose_progress = TRUE) #> Growing trees: 100%.  #> Computing predictions: 100%."},{"path":"https://bcjaeger.github.io/aorsf/articles/fast.html","id":"dont-wait--estimate","dir":"Articles","previous_headings":"","what":"Don’t wait. Estimate!","title":"Tips to speed up computation","text":"Instead running model hoping fast, can estimate long specification model take using no_fit = TRUE call orsf().","code":"fit_spec <- orsf(pbc_orsf,                   formula = time+status~. -id,                   control = orsf_control_survival(method = 'net'),                   n_tree = 2000,                  no_fit = TRUE)  # how much time it takes to estimate training time: system.time(  time_est <- orsf_time_to_train(fit_spec, n_tree_subset = 5) ) #>    user  system elapsed  #>   0.275   0.000   0.275 # the estimated training time: time_est #> Time difference of 109.7086 secs"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"out-of-bag-data","dir":"Articles","previous_headings":"","what":"Out-of-bag data","title":"Out-of-bag predictions and evaluation","text":"random forests, tree grown bootstrapped version training set. bootstrap samples selected replacement, bootstrapped training set contains two-thirds instances original training set. ‘--bag’ data instances bootstrapped training set.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"out-of-bag-predictions-and-error","dir":"Articles","previous_headings":"","what":"Out-of-bag predictions and error","title":"Out-of-bag predictions and evaluation","text":"tree random forest can make predictions --bag data, --bag predictions can aggregated make ensemble --bag prediction. Since --bag data used grow tree, accuracy ensemble --bag predictions approximate generalization error random forest. --bag prediction error plays central role routines estimate variable importance, e.g. negation importance. fit oblique random survival forest plot distribution ensemble --bag predictions.  Next, let’s check --bag accuracy fit: --bag estimate Harrell’s C-index (default method evaluate --bag predictions) 0.7419135.","code":"fit <- orsf(data = pbc_orsf,              formula = Surv(time, status) ~ . - id,             oobag_pred_type = 'surv',             n_tree = 5,             oobag_pred_horizon = 2000)  hist(fit$pred_oobag,       main = 'Out-of-bag survival predictions at t=2,000') # what function is used to evaluate out-of-bag predictions? fit$eval_oobag$stat_type #> [1] \"Harrell's C-index\" # what is the output from this function? fit$eval_oobag$stat_values #>           [,1] #> [1,] 0.7419135"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"monitoring-out-of-bag-error","dir":"Articles","previous_headings":"","what":"Monitoring out-of-bag error","title":"Out-of-bag predictions and evaluation","text":"--bag data set contains one-third training set, --bag error estimate usually converges stable value trees added forest. want monitor convergence --bag error oblique random survival forest, can set oobag_eval_every compute --bag error every oobag_eval_every tree. example, let’s compute --bag error fitting tree forest 50 trees:  general, least 500 trees recommended random forest fit. ’re just using 10 illustration.","code":"fit <- orsf(data = pbc_orsf,             formula = Surv(time, status) ~ . - id,             n_tree = 20,             tree_seeds = 2,             oobag_pred_type = 'surv',             oobag_pred_horizon = 2000,             oobag_eval_every = 1)  plot(  x = seq(1, 20, by = 1),  y = fit$eval_oobag$stat_values,   main = 'Out-of-bag C-statistic computed after each new tree is grown.',  xlab = 'Number of trees grown',  ylab = fit$eval_oobag$stat_type )  lines(x=seq(1, 20), y = fit$eval_oobag$stat_values)"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"user-supplied-out-of-bag-evaluation-functions","dir":"Articles","previous_headings":"","what":"User-supplied out-of-bag evaluation functions","title":"Out-of-bag predictions and evaluation","text":"cases, may want control --bag error estimated. example, let’s use Brier score SurvMetrics package: two ways apply function compute --bag error. First, can apply function --bag survival predictions stored ‘aorsf’ objects, e.g: Second, can pass function orsf(), used place Harrell’s C-statistic:","code":"oobag_brier_surv <- function(y_mat, w_vec, s_vec){   # use if SurvMetrics is available  if(requireNamespace(\"SurvMetrics\")){      return(    # output is numeric vector of length 1    as.numeric(     SurvMetrics::Brier(      object = Surv(time = y_mat[, 1], event = y_mat[, 2]),       pre_sp = s_vec,      # t_star in Brier() should match oob_pred_horizon in orsf()      t_star = 2000     )    )   )        }    # if not available, use a dummy version  mean( (y_mat[,2] - (1-s_vec))^2 )     } oobag_brier_surv(y_mat = pbc_orsf[,c('time', 'status')],                  s_vec = fit$pred_oobag) #> Loading required namespace: SurvMetrics #> [1] 0.11869 # instead of copy/pasting the modeling code and then modifying it, # you can just use orsf_update.  fit_brier <- orsf_update(fit, oobag_fun = oobag_brier_surv)  plot(  x = seq(1, 20, by = 1),  y = fit_brier$eval_oobag$stat_values,   main = 'Out-of-bag error computed after each new tree is grown.',  sub = 'For the Brier score, lower values indicate more accurate predictions',  xlab = 'Number of trees grown',  ylab = \"Brier score\" )  lines(x=seq(1, 20), y = fit_brier$eval_oobag$stat_values)"},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"specific-instructions-on-user-supplied-functions","dir":"Articles","previous_headings":"User-supplied out-of-bag evaluation functions","what":"Specific instructions on user-supplied functions","title":"Out-of-bag predictions and evaluation","text":"use oobag_fun note following: oobag_fun three inputs: y_mat, w_vec, s_vec survival trees, y_mat two column matrix first column named ‘time’ second named ‘status’. classification trees, y_mat matrix number columns = number distinct classes outcome. regression, y_mat matrix one column. s_vec numeric vector containing predictions oobag_fun return numeric output length 1","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/oobag.html","id":"notes","dir":"Articles","previous_headings":"","what":"Notes","title":"Out-of-bag predictions and evaluation","text":"evaluating --bag error: oobag_pred_horizon input orsf() determines prediction horizon --bag predictions. prediction horizon needs specified evaluate prediction accuracy cases, examples . sure check case using functions, , sure oobag_pred_horizon matches prediction horizon used custom function. functions expect predicted risk (.e., 1 - predicted survival), others expect predicted survival.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"partial-dependence-pd","dir":"Articles","previous_headings":"","what":"Partial dependence (PD)","title":"PD and ICE curves with ORSF","text":"Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model’s prediction. can compute PD individual conditional expectation (ICE) three ways: using -bag predictions training data. -bag PD indicates relationships model learned training. helpful goal interpret model. using --bag predictions training data. --bag PD indicates relationships model learned training using --bag data simulates application model new data. helpful want test model’s reliability fairness new data don’t access large testing set. using predictions new set data. New data PD shows model predicts outcomes observations seen. helpful want test model’s reliability fairness.","code":"library(aorsf) library(ggplot2)"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"classification","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Classification","title":"PD and ICE curves with ORSF","text":"Begin fitting oblique classification random forest: Compute PD using --bag data flipper_length_mm = c(190, 210). Note predicted probabilities returned class probabilities mean column sum 1 take sum class specific value pred_spec variables. example, isn’t case median predicted probability!","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_oob <- orsf_pd_oob(fit_clsf, pred_spec = pred_spec)  pd_oob #> Key: <class> #>        class flipper_length_mm      mean         lwr       medn       upr #>       <fctr>             <num>     <num>       <num>      <num>     <num> #> 1:    Adelie               190 0.6182417 0.206899034 0.75537171 0.9796439 #> 2:    Adelie               210 0.4348386 0.019519733 0.56802082 0.8620694 #> 3: Chinstrap               190 0.2114905 0.018420139 0.15561560 0.7174734 #> 4: Chinstrap               210 0.1806274 0.020409141 0.09928047 0.6990198 #> 5:    Gentoo               190 0.1702678 0.001281382 0.02830728 0.5733438 #> 6:    Gentoo               210 0.3845340 0.072260715 0.20258335 0.9519486 sum(pd_oob[flipper_length_mm == 190, mean]) #> [1] 1 sum(pd_oob[flipper_length_mm == 190, medn]) #> [1] 0.9392946"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"regression","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Regression","title":"PD and ICE curves with ORSF","text":"Begin fitting oblique regression random forest: Compute PD using new data flipper_length_mm = c(190, 210). can also let pred_spec_auto pick reasonable values like : default, combinations variables used. However, can also look variables one one, separately, like : can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new #>    flipper_length_mm     mean      lwr     medn      upr #>                <num>    <num>    <num>    <num>    <num> #> 1:               190 42.96571 37.09805 43.69769 48.72301 #> 2:               210 45.66012 40.50693 46.31577 51.65163 pred_spec = pred_spec_auto(species, island, body_mass_g)  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new #>       species    island body_mass_g     mean      lwr     medn      upr #>        <fctr>    <fctr>       <num>    <num>    <num>    <num>    <num> #>  1:    Adelie    Biscoe        3200 40.31374 37.24373 40.31967 44.22824 #>  2: Chinstrap    Biscoe        3200 45.10582 42.63342 45.10859 47.60119 #>  3:    Gentoo    Biscoe        3200 42.81649 40.19221 42.55664 46.84035 #>  4:    Adelie     Dream        3200 40.16219 36.95895 40.34633 43.90681 #>  5: Chinstrap     Dream        3200 46.21778 43.53954 45.90929 49.19173 #>  6:    Gentoo     Dream        3200 42.60465 39.89647 42.63520 46.28769 #>  7:    Adelie Torgersen        3200 39.91652 36.80227 39.79806 43.68842 #>  8: Chinstrap Torgersen        3200 44.27807 41.95470 44.40742 46.68848 #>  9:    Gentoo Torgersen        3200 42.09510 39.49863 41.80049 45.81833 #> 10:    Adelie    Biscoe        3550 40.77971 38.04027 40.59561 44.57505 #> 11: Chinstrap    Biscoe        3550 45.81304 43.52102 45.73116 48.36366 #> 12:    Gentoo    Biscoe        3550 43.31233 40.77355 43.03077 47.22936 #> 13:    Adelie     Dream        3550 40.77741 38.07399 40.78175 44.37273 #> 14: Chinstrap     Dream        3550 47.30926 44.80493 46.77540 50.47092 #> 15:    Gentoo     Dream        3550 43.26955 40.86119 43.16204 46.89190 #> 16:    Adelie Torgersen        3550 40.25780 37.35251 40.07871 44.04576 #> 17: Chinstrap Torgersen        3550 44.77911 42.60161 44.81944 47.14986 #> 18:    Gentoo Torgersen        3550 42.49520 39.95866 42.14160 46.26237 #> 19:    Adelie    Biscoe        3975 41.61744 38.94515 41.36634 45.38752 #> 20: Chinstrap    Biscoe        3975 46.59363 44.59970 46.44923 49.11457 #> 21:    Gentoo    Biscoe        3975 44.07857 41.60792 43.74562 47.85109 #> 22:    Adelie     Dream        3975 41.50511 39.06187 41.24741 45.13027 #> 23: Chinstrap     Dream        3975 48.14978 45.87390 47.54867 51.50683 #> 24:    Gentoo     Dream        3975 44.01928 41.70577 43.84099 47.50470 #> 25:    Adelie Torgersen        3975 40.94764 38.12519 40.66759 44.73689 #> 26: Chinstrap Torgersen        3975 45.44820 43.49986 45.44036 47.63243 #> 27:    Gentoo Torgersen        3975 43.13791 40.70628 42.70627 46.87306 #> 28:    Adelie    Biscoe        4700 42.93914 40.48463 42.44768 46.81756 #> 29: Chinstrap    Biscoe        4700 47.18517 45.40866 47.07739 49.55747 #> 30:    Gentoo    Biscoe        4700 45.32541 43.08173 44.93498 49.23391 #> 31:    Adelie     Dream        4700 42.73806 40.44229 42.22226 46.49936 #> 32: Chinstrap     Dream        4700 48.37278 46.34335 48.00781 51.18955 #> 33:    Gentoo     Dream        4700 45.09132 42.88328 44.79530 48.82180 #> 34:    Adelie Torgersen        4700 42.09349 39.72074 41.56168 45.68838 #> 35: Chinstrap Torgersen        4700 46.16807 44.38410 46.09525 48.35127 #> 36:    Gentoo Torgersen        4700 44.31621 42.18968 43.81773 47.98024 #> 37:    Adelie    Biscoe        5300 43.89769 41.43335 43.28504 48.10892 #> 38: Chinstrap    Biscoe        5300 47.53721 45.66038 47.52770 49.88701 #> 39:    Gentoo    Biscoe        5300 46.16115 43.81722 45.59309 50.57469 #> 40:    Adelie     Dream        5300 43.59846 41.25825 43.24518 47.46193 #> 41: Chinstrap     Dream        5300 48.48139 46.36282 48.25679 51.02996 #> 42:    Gentoo     Dream        5300 45.91819 43.62832 45.54110 49.91622 #> 43:    Adelie Torgersen        5300 42.92879 40.66576 42.31072 46.76406 #> 44: Chinstrap Torgersen        5300 46.59576 44.80400 46.49196 49.03906 #> 45:    Gentoo Torgersen        5300 45.11384 42.95190 44.51289 49.27629 #>       species    island body_mass_g     mean      lwr     medn      upr pd_new <- orsf_pd_new(fit_regr,                        expand_grid = FALSE,                       pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new #>        variable value     level     mean      lwr     medn      upr #>          <char> <num>    <char>    <num>    <num>    <num>    <num> #>  1:     species    NA    Adelie 41.90271 37.10417 41.51723 48.51478 #>  2:     species    NA Chinstrap 47.11314 42.40419 46.96478 51.51392 #>  3:     species    NA    Gentoo 44.37038 39.87306 43.89889 51.21635 #>  4:      island    NA    Biscoe 44.21332 37.22711 45.27862 51.21635 #>  5:      island    NA     Dream 44.43354 37.01471 45.57261 51.51392 #>  6:      island    NA Torgersen 43.29539 37.01513 44.26924 49.84391 #>  7: body_mass_g  3200      <NA> 42.84625 37.03978 43.95991 49.19173 #>  8: body_mass_g  3550      <NA> 43.53326 37.56730 44.43756 50.47092 #>  9: body_mass_g  3975      <NA> 44.30431 38.31567 45.22089 51.50683 #> 10: body_mass_g  4700      <NA> 45.22525 39.88199 46.34680 51.18955 #> 11: body_mass_g  5300      <NA> 45.91412 40.84742 46.95327 51.48851 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = custom_pred_spec,                       new_data = penguins_orsf_test)  pd_new #>    species island     mean      lwr     medn      upr #>     <fctr> <fctr>    <num>    <num>    <num>    <num> #> 1:  Adelie Biscoe 41.98024 37.22711 41.65252 48.51478"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"survival","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Survival","title":"PD and ICE curves with ORSF","text":"Begin fitting oblique survival random forest: Compute PD using -bag data bili = c(1,2,3,4,5): don’t specific values variable mind, let pred_spec_auto pick : Specify pred_horizon get PD value:","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) pd_train <- orsf_pd_inb(fit_surv, pred_spec = list(bili = 1:5)) pd_train #>    pred_horizon  bili      mean        lwr      medn       upr #>           <num> <num>     <num>      <num>     <num>     <num> #> 1:      1826.25     1 0.2575450 0.02234786 0.1334170 0.8917942 #> 2:      1826.25     2 0.3130469 0.06853733 0.1906695 0.9203372 #> 3:      1826.25     3 0.3711963 0.11409793 0.2582027 0.9416791 #> 4:      1826.25     4 0.4248968 0.15648381 0.3334579 0.9591581 #> 5:      1826.25     5 0.4671699 0.20123406 0.3855137 0.9655296 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili)) pd_train #>    pred_horizon  bili      mean        lwr      medn       upr #>           <num> <num>     <num>      <num>     <num>     <num> #> 1:      1826.25 0.590 0.2493753 0.02035041 0.1250263 0.8823385 #> 2:      1826.25 0.725 0.2517103 0.02060111 0.1281814 0.8836536 #> 3:      1826.25 1.500 0.2807082 0.03964900 0.1601715 0.9040617 #> 4:      1826.25 3.500 0.3968251 0.13431288 0.2934565 0.9501230 #> 5:      1826.25 7.210 0.5352155 0.27869513 0.4658256 0.9782084 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili),                         pred_horizon = seq(500, 3000, by = 500)) pd_train #>     pred_horizon  bili       mean          lwr        medn       upr #>            <num> <num>      <num>        <num>       <num>     <num> #>  1:          500 0.590 0.06217164 0.0004433990 0.008765301 0.5918852 #>  2:         1000 0.590 0.14282695 0.0057937418 0.056509484 0.7381953 #>  3:         1500 0.590 0.20944972 0.0136094784 0.092379507 0.8577223 #>  4:         2000 0.590 0.26917477 0.0230476894 0.146421502 0.8918696 #>  5:         2500 0.590 0.31901518 0.0631155452 0.203673185 0.9034059 #>  6:         3000 0.590 0.39244000 0.0911566314 0.302726475 0.9239494 #>  7:          500 0.725 0.06287876 0.0004462367 0.009001904 0.5980510 #>  8:         1000 0.725 0.14409310 0.0063321712 0.056833294 0.7448126 #>  9:         1500 0.725 0.21143724 0.0140736894 0.093685200 0.8597396 #> 10:         2000 0.725 0.27150368 0.0235448705 0.147022224 0.8940497 #> 11:         2500 0.725 0.32014805 0.0626303822 0.203946002 0.9073003 #> 12:         3000 0.725 0.39518173 0.0911457406 0.308428469 0.9252028 #> 13:          500 1.500 0.06712295 0.0012717884 0.011028398 0.6240769 #> 14:         1000 1.500 0.15802582 0.0114789623 0.068332010 0.7683888 #> 15:         1500 1.500 0.23407183 0.0287320952 0.117289745 0.8789647 #> 16:         2000 1.500 0.30235436 0.0467927208 0.180096425 0.9143235 #> 17:         2500 1.500 0.35354874 0.0845866747 0.238415966 0.9265099 #> 18:         3000 1.500 0.43604287 0.1311103304 0.348078730 0.9438196 #> 19:          500 3.500 0.08677320 0.0052087533 0.028244374 0.6741102 #> 20:         1000 3.500 0.22427808 0.0519179775 0.139857107 0.8277541 #> 21:         1500 3.500 0.32788654 0.0901983241 0.217982772 0.9371150 #> 22:         2000 3.500 0.41708208 0.1445328597 0.313224605 0.9566091 #> 23:         2500 3.500 0.49334883 0.2195110942 0.402932569 0.9636221 #> 24:         3000 3.500 0.56094391 0.2647541788 0.503509668 0.9734948 #> 25:          500 7.210 0.12591911 0.0220920570 0.063283130 0.7522611 #> 26:         1000 7.210 0.32642477 0.1353851175 0.259731888 0.8879218 #> 27:         1500 7.210 0.46409472 0.2181840827 0.387142510 0.9700903 #> 28:         2000 7.210 0.55116942 0.2912654769 0.484118150 0.9811496 #> 29:         2500 7.210 0.62008114 0.3709845684 0.568822502 0.9844945 #> 30:         3000 7.210 0.68030697 0.4247511750 0.646009789 0.9888637 #>     pred_horizon  bili       mean          lwr        medn       upr"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"one-variable-moving-horizon","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"One variable, moving horizon","title":"PD and ICE curves with ORSF","text":"next sections, update orsf_fit include data pbc_orsf instead just training sample: effect predictor varies time? Partial dependence can show .  inspection, can see males higher risk females difference risk grows time. can also seen viewing ratio expected risk time:  get view PD number variables training data, use orsf_summarize_uni(). function computes --bag PD important n_variables returns nicely formatted view output: ‘summary’ object can converted data.table downstream plotting tables.","code":"# a rare case of modify_in_place = TRUE orsf_update(fit_surv,              data = pbc_orsf,              modify_in_place = TRUE)  fit_surv #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 500 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 21.038 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.84 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> ----------------------------------------- pd_sex_tv <- orsf_pd_oob(fit_surv,                           pred_spec = pred_spec_auto(sex),                          pred_horizon = seq(365, 365*5))  ggplot(pd_sex_tv) +  aes(x = pred_horizon, y = mean, color = sex) +   geom_line() +  labs(x = 'Time since baseline',       y = 'Expected risk') library(data.table)  ratio_tv <- pd_sex_tv[  , .(ratio = mean[sex == 'm'] / mean[sex == 'f']), by = pred_horizon ]  ggplot(ratio_tv, aes(x = pred_horizon, y = ratio)) +   geom_line(color = 'grey') +   geom_smooth(color = 'black', se = FALSE) +   labs(x = 'time since baseline',       y = 'ratio in expected risk for males versus females') pd_smry <- orsf_summarize_uni(fit_surv, n_variables = 4)  pd_smry #>  #> -- ascites (VI Rank: 1) ------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3083328 0.1985589 0.06581247 0.5241336 #>       1 0.4702396 0.3975953 0.27481738 0.6564321 #>  #> -- bili (VI Rank: 2) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2356543 0.1536301 0.05872720 0.3719578 #>    0.80 0.2398021 0.1609720 0.06167673 0.3776136 #>    1.40 0.2613612 0.1809950 0.07893386 0.4064484 #>    3.52 0.3702763 0.3118827 0.17050712 0.5447088 #>    7.25 0.4780580 0.4406202 0.29442977 0.6434075 #>  #> -- edema (VI Rank: 3) --------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3035731 0.1840849 0.06509174 0.5228237 #>     0.5 0.3558716 0.2649457 0.11132293 0.5831396 #>       1 0.4693915 0.3961470 0.28211662 0.6331870 #>  #> -- copper (VI Rank: 4) -------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    25.5 0.2632768 0.1622871 0.05581251 0.4308234 #>    42.8 0.2707739 0.1703028 0.05887747 0.4418590 #>    74.0 0.2908707 0.1940176 0.07155433 0.4768302 #>     129 0.3444258 0.2651729 0.11918406 0.5574967 #>     214 0.4245218 0.3577346 0.21408331 0.6238041 #>  #>  Predicted risk at time t = 1826.25 for top 4 predictors head(as.data.table(pd_smry)) #>    variable importance  Value      Mean    Median     25th %    75th % #>      <char>      <num> <char>     <num>     <num>      <num>     <num> #> 1:  ascites  0.4960630      0 0.3083328 0.1985589 0.06581247 0.5241336 #> 2:  ascites  0.4960630      1 0.4702396 0.3975953 0.27481738 0.6564321 #> 3:     bili  0.4160074   0.60 0.2356543 0.1536301 0.05872720 0.3719578 #> 4:     bili  0.4160074   0.80 0.2398021 0.1609720 0.06167673 0.3776136 #> 5:     bili  0.4160074   1.40 0.2613612 0.1809950 0.07893386 0.4064484 #> 6:     bili  0.4160074   3.52 0.3702763 0.3118827 0.17050712 0.5447088 #>    pred_horizon  level #>           <num> <char> #> 1:      1826.25      0 #> 2:      1826.25      1 #> 3:      1826.25   <NA> #> 4:      1826.25   <NA> #> 5:      1826.25   <NA> #> 6:      1826.25   <NA>"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"multiple-variables-jointly","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Multiple variables, jointly","title":"PD and ICE curves with ORSF","text":"Partial dependence can show expected value model’s predictions function specific predictor, function multiple predictors. instance, can estimate predicted risk joint function bili, edema, trt:  inspection, model’s predictions indicate slightly lower risk placebo group, seem change much different values bili edema. clear increase predicted risk higher levels edema higher levels bili slope predicted risk function bili appears highest among patients edema 0.5. effect bili modified edema 0.5? quick sanity check coxph suggests .","code":"pred_spec = pred_spec_auto(bili, edema, trt)  pd_bili_edema <- orsf_pd_oob(fit_surv, pred_spec)  ggplot(pd_bili_edema) +   aes(x = bili, y = medn, col = trt, linetype = edema) +   geom_line() +   labs(y = 'Expected predicted risk') library(survival)  pbc_orsf$edema_05 <- ifelse(pbc_orsf$edema == '0.5', 'yes', 'no')  fit_cph <- coxph(Surv(time,status) ~ edema_05 * bili,                   data = pbc_orsf)  anova(fit_cph) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>                loglik   Chisq Df Pr(>|Chi|)     #> NULL          -550.19                           #> edema_05      -546.83  6.7248  1   0.009508 **  #> bili          -513.59 66.4689  1  3.555e-16 *** #> edema_05:bili -510.54  6.1112  1   0.013433 *   #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"find-interactions-using-pd","dir":"Articles","previous_headings":"Partial dependence (PD)","what":"Find interactions using PD","title":"PD and ICE curves with ORSF","text":"Random forests good using interactions, less good telling . Use orsf_vint() apply method variable interaction scoring PD described Greenwell et al (2018). can take little lots predictors, seems work best continuous continuous interactions. Interactions categorical variables sometimes - - scored. scores include partial dependence values can pull plot:  use sanity check coxph see interactions detected using standard test: Note: Caution warranted interpreting statistical hypotheses motivated data tested . Results like p-values interaction shown interpreted exploratory.","code":"# use just the continuous variables preds <- names(fit_surv$get_means())  vint_scores <- orsf_vint(fit_surv, predictors = preds)  vint_scores #>            interaction      score          pd_values #>                 <char>      <num>             <list> #>  1:   albumin..protime 1.15973071 <data.table[25x9]> #>  2:    copper..protime 0.79587419 <data.table[25x9]> #>  3:         bili..chol 0.74163213 <data.table[25x9]> #>  4:          age..bili 0.74097713 <data.table[25x9]> #>  5:       bili..copper 0.71610872 <data.table[25x9]> #>  6:      bili..albumin 0.67849272 <data.table[25x9]> #>  7:      bili..protime 0.59576252 <data.table[25x9]> #>  8:       albumin..ast 0.59439149 <data.table[25x9]> #>  9:     bili..platelet 0.56627946 <data.table[25x9]> #> 10:       ast..protime 0.56220910 <data.table[25x9]> #> 11:    albumin..copper 0.54057277 <data.table[25x9]> #> 12:         bili..trig 0.52794450 <data.table[25x9]> #> 13:       copper..trig 0.50661291 <data.table[25x9]> #> 14:       age..protime 0.45818900 <data.table[25x9]> #> 15:           age..ast 0.44410913 <data.table[25x9]> #> 16:      age..platelet 0.42607794 <data.table[25x9]> #> 17:  albumin..platelet 0.41293884 <data.table[25x9]> #> 18:      chol..albumin 0.39547725 <data.table[25x9]> #> 19:  platelet..protime 0.38674364 <data.table[25x9]> #> 20:        age..copper 0.36230121 <data.table[25x9]> #> 21:        copper..ast 0.35089611 <data.table[25x9]> #> 22:      trig..protime 0.29339926 <data.table[25x9]> #> 23:     bili..alk.phos 0.25729691 <data.table[25x9]> #> 24:      chol..protime 0.24424042 <data.table[25x9]> #> 25:   copper..alk.phos 0.22156162 <data.table[25x9]> #> 26:          bili..ast 0.21483757 <data.table[25x9]> #> 27:         chol..trig 0.20737852 <data.table[25x9]> #> 28:     trig..platelet 0.18819009 <data.table[25x9]> #> 29:      age..alk.phos 0.17844523 <data.table[25x9]> #> 30:       chol..copper 0.17025610 <data.table[25x9]> #> 31:   copper..platelet 0.16009542 <data.table[25x9]> #> 32:       age..albumin 0.15186211 <data.table[25x9]> #> 33:     alk.phos..trig 0.14212275 <data.table[25x9]> #> 34:          age..trig 0.12185330 <data.table[25x9]> #> 35:  albumin..alk.phos 0.12061152 <data.table[25x9]> #> 36:          chol..ast 0.10767371 <data.table[25x9]> #> 37:     chol..alk.phos 0.10712377 <data.table[25x9]> #> 38:      ast..platelet 0.09157413 <data.table[25x9]> #> 39:  alk.phos..protime 0.08277287 <data.table[25x9]> #> 40:      alk.phos..ast 0.08062752 <data.table[25x9]> #> 41:          ast..trig 0.07157470 <data.table[25x9]> #> 42:          age..chol 0.05564449 <data.table[25x9]> #> 43:     chol..platelet 0.04813670 <data.table[25x9]> #> 44: alk.phos..platelet 0.04760897 <data.table[25x9]> #> 45:      albumin..trig 0.04689324 <data.table[25x9]> #>            interaction      score          pd_values # top scoring interaction pd_top <- vint_scores$pd_values[[1]]  # center pd values so it's easier to see the interaction effect pd_top[, mean := mean - mean[1], by = var_2_value]  ggplot(pd_top) +   aes(x = var_1_value,       y = mean,       color = factor(var_2_value),       group = factor(var_2_value)) +   geom_line() +   labs(x = \"albumin\",        y = \"predicted mortality (centered)\",       color = \"protime\") # test the top score (expect strong interaction) fit_cph <- coxph(Surv(time,status) ~ albumin * protime,                   data = pbc_orsf)  anova(fit_cph) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>                  loglik  Chisq Df Pr(>|Chi|)     #> NULL            -550.19                          #> albumin         -526.29 47.801  1  4.717e-12 *** #> protime         -514.89 22.806  1  1.792e-06 *** #> albumin:protime -511.76  6.252  1    0.01241 *   #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"individual-conditional-expectations-ice","dir":"Articles","previous_headings":"","what":"Individual conditional expectations (ICE)","title":"PD and ICE curves with ORSF","text":"Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"classification-1","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Classification","title":"PD and ICE curves with ORSF","text":"Compute ICE using --bag data flipper_length_mm = c(190, 210). two identifiers output: id_variable identifier current value variable(s) data. redundant one variable, helpful multiple variables. id_row identifier observation original data. Note predicted probabilities returned class observation data. Predicted probabilities given observation given variable value sum 1. example,","code":"pred_spec <- list(flipper_length_mm = c(190, 210))  ice_oob <- orsf_ice_oob(fit_clsf, pred_spec = pred_spec)  ice_oob #> Key: <class> #>      id_variable id_row  class flipper_length_mm       pred #>            <int> <char> <fctr>             <num>      <num> #>   1:           1      1 Adelie               190 0.92045213 #>   2:           1      2 Adelie               190 0.80427932 #>   3:           1      3 Adelie               190 0.84342550 #>   4:           1      4 Adelie               190 0.93514694 #>   5:           1      5 Adelie               190 0.97172229 #>  ---                                                        #> 896:           2    146 Gentoo               210 0.25779089 #> 897:           2    147 Gentoo               210 0.04806888 #> 898:           2    148 Gentoo               210 0.07926342 #> 899:           2    149 Gentoo               210 0.84597108 #> 900:           2    150 Gentoo               210 0.10191162 ice_oob %>%  .[flipper_length_mm == 190] %>%   .[id_row == 1] %>%   .[['pred']] %>%   sum() #> [1] 1"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"regression-1","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Regression","title":"PD and ICE curves with ORSF","text":"Compute ICE using new data flipper_length_mm = c(190, 210). can also let pred_spec_auto pick reasonable values like : default, combinations variables used. However, can also look variables one one, separately, like : can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"pred_spec <- list(flipper_length_mm = c(190, 210))  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new #>      id_variable id_row flipper_length_mm     pred #>            <int> <char>             <num>    <num> #>   1:           1      1               190 37.94483 #>   2:           1      2               190 37.61595 #>   3:           1      3               190 37.53681 #>   4:           1      4               190 39.49476 #>   5:           1      5               190 38.95635 #>  ---                                               #> 362:           2    179               210 51.80471 #> 363:           2    180               210 47.27183 #> 364:           2    181               210 47.05031 #> 365:           2    182               210 50.39028 #> 366:           2    183               210 48.44774 pred_spec = pred_spec_auto(species, island, body_mass_g)  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new #>       id_variable id_row species    island body_mass_g     pred #>             <int> <char>  <fctr>    <fctr>       <num>    <num> #>    1:           1      1  Adelie    Biscoe        3200 37.78339 #>    2:           1      2  Adelie    Biscoe        3200 37.73273 #>    3:           1      3  Adelie    Biscoe        3200 37.71248 #>    4:           1      4  Adelie    Biscoe        3200 40.25782 #>    5:           1      5  Adelie    Biscoe        3200 40.04074 #>   ---                                                           #> 8231:          45    179  Gentoo Torgersen        5300 46.14559 #> 8232:          45    180  Gentoo Torgersen        5300 43.98050 #> 8233:          45    181  Gentoo Torgersen        5300 44.59837 #> 8234:          45    182  Gentoo Torgersen        5300 44.85146 #> 8235:          45    183  Gentoo Torgersen        5300 44.23710 ice_new <- orsf_ice_new(fit_regr,                          expand_grid = FALSE,                         pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new #>       id_variable id_row    variable value  level     pred #>             <int> <char>      <char> <num> <char>    <num> #>    1:           1      1     species    NA Adelie 37.74136 #>    2:           1      2     species    NA Adelie 37.42367 #>    3:           1      3     species    NA Adelie 37.04598 #>    4:           1      4     species    NA Adelie 39.89602 #>    5:           1      5     species    NA Adelie 39.14848 #>   ---                                                      #> 2009:           5    179 body_mass_g  5300   <NA> 51.50196 #> 2010:           5    180 body_mass_g  5300   <NA> 47.27055 #> 2011:           5    181 body_mass_g  5300   <NA> 48.34064 #> 2012:           5    182 body_mass_g  5300   <NA> 48.75828 #> 2013:           5    183 body_mass_g  5300   <NA> 48.11020 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = custom_pred_spec,                         new_data = penguins_orsf_test)  ice_new #>      id_variable id_row species island     pred #>            <int> <char>  <fctr> <fctr>    <num> #>   1:           1      1  Adelie Biscoe 38.52327 #>   2:           1      2  Adelie Biscoe 38.32073 #>   3:           1      3  Adelie Biscoe 37.71248 #>   4:           1      4  Adelie Biscoe 41.68380 #>   5:           1      5  Adelie Biscoe 40.91140 #>  ---                                            #> 179:           1    179  Adelie Biscoe 43.09493 #> 180:           1    180  Adelie Biscoe 38.79455 #> 181:           1    181  Adelie Biscoe 39.37734 #> 182:           1    182  Adelie Biscoe 40.71952 #> 183:           1    183  Adelie Biscoe 39.34501"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"survival-1","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Survival","title":"PD and ICE curves with ORSF","text":"Compute ICE using -bag data bili = c(1,2,3,4,5): don’t specific values variable mind, let pred_spec_auto pick : Specify pred_horizon get ICE value: Multi-prediction horizon ice comes minimal extra computational cost. Use fine grid time values assess whether predictors time-varying effects.","code":"ice_train <- orsf_ice_inb(fit_surv, pred_spec = list(bili = 1:5)) ice_train #>       id_variable id_row pred_horizon  bili      pred #>             <int> <char>        <num> <num>     <num> #>    1:           1      1      1826.25     1 0.9015162 #>    2:           1      2      1826.25     1 0.1019426 #>    3:           1      3      1826.25     1 0.6821646 #>    4:           1      4      1826.25     1 0.3623411 #>    5:           1      5      1826.25     1 0.1374271 #>   ---                                                 #> 1376:           5    272      1826.25     5 0.2650957 #> 1377:           5    273      1826.25     5 0.3065318 #> 1378:           5    274      1826.25     5 0.3503776 #> 1379:           5    275      1826.25     5 0.1652897 #> 1380:           5    276      1826.25     5 0.3549165 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili)) ice_train #>       id_variable id_row pred_horizon  bili       pred #>             <int> <char>        <num> <num>      <num> #>    1:           1      1      1826.25  0.60 0.89210440 #>    2:           1      2      1826.25  0.60 0.09186876 #>    3:           1      3      1826.25  0.60 0.65503431 #>    4:           1      4      1826.25  0.60 0.34622748 #>    5:           1      5      1826.25  0.60 0.13310425 #>   ---                                                  #> 1376:           5    272      1826.25  7.25 0.31258148 #> 1377:           5    273      1826.25  7.25 0.35478676 #> 1378:           5    274      1826.25  7.25 0.41559176 #> 1379:           5    275      1826.25  7.25 0.25301890 #> 1380:           5    276      1826.25  7.25 0.44533769 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili),                           pred_horizon = seq(500, 3000, by = 500)) ice_train #>       id_variable id_row pred_horizon  bili      pred #>             <int> <char>        <num> <num>     <num> #>    1:           1      1          500  0.60 0.5949598 #>    2:           1      1         1000  0.60 0.7652137 #>    3:           1      1         1500  0.60 0.8751746 #>    4:           1      1         2000  0.60 0.9057135 #>    5:           1      1         2500  0.60 0.9231915 #>   ---                                                 #> 8276:           5    276         1000  7.25 0.2111306 #> 8277:           5    276         1500  7.25 0.3642278 #> 8278:           5    276         2000  7.25 0.4850492 #> 8279:           5    276         2500  7.25 0.5720362 #> 8280:           5    276         3000  7.25 0.6206786"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"visualizing-ice-curves","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Visualizing ICE curves","title":"PD and ICE curves with ORSF","text":"Inspecting ICE curves observation can help identify whether heterogeneity model’s predictions. .e., effect variable follow pattern data, groups variable impacts risk differently? going turn boundary checking orsf_ice_oob setting boundary_checks = FALSE, allow generate ICE curves go beyond 90th percentile bili. plots, helpful scale ICE data. subtract initial value predicted risk (.e., bili = 1) observation’s conditional expectation values. , Every curve start 0 plot shows change predicted risk function bili. Now can visualize curves.  inspection figure, individual slopes cluster around overall trend - Good! small number individual slopes appear flat. may helpful investigate .","code":"pred_spec <- list(bili = seq(1, 10, length.out = 25))  ice_oob <- orsf_ice_oob(fit_surv, pred_spec, boundary_checks = FALSE)  ice_oob #>       id_variable id_row pred_horizon  bili      pred #>             <int> <char>        <num> <num>     <num> #>    1:           1      1      1826.25     1 0.8790861 #>    2:           1      2      1826.25     1 0.8132035 #>    3:           1      3      1826.25     1 0.6240238 #>    4:           1      4      1826.25     1 0.7461603 #>    5:           1      5      1826.25     1 0.5754091 #>   ---                                                 #> 6896:          25    272      1826.25    10 0.7018976 #> 6897:          25    273      1826.25    10 0.4606246 #> 6898:          25    274      1826.25    10 0.3347082 #> 6899:          25    275      1826.25    10 0.6046024 #> 6900:          25    276      1826.25    10 0.2789017 ice_oob[, pred_subtract := rep(pred[id_variable==1], times=25)] ice_oob[, pred := pred - pred_subtract] ggplot(ice_oob, aes(x = bili,                      y = pred,                      group = id_row)) +   geom_line(alpha = 0.15) +   labs(y = 'Change in predicted risk') +  geom_smooth(se = FALSE, aes(group = 1))"},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"limitations-of-pd","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"Limitations of PD","title":"PD and ICE curves with ORSF","text":"Partial dependence number known limitations assumptions users aware (see Hooker, 2021). particular, partial dependence less intuitive >2 predictors examined jointly, assumed feature(s) partial dependence computed correlated features (likely true many cases). Accumulated local effect plots can used (see ) case feature independence valid assumption.","code":""},{"path":"https://bcjaeger.github.io/aorsf/articles/pd.html","id":"references","dir":"Articles","previous_headings":"Individual conditional expectations (ICE)","what":"References","title":"PD and ICE curves with ORSF","text":"Hooker, Giles, Mentch, Lucas, Zhou, Siyu (2021). “Unrestricted permutation forces extrapolation: variable importance requires least one model, free variable importance.” Statistics Computing, 31, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Byron Jaeger. Author, maintainer. Nicholas Pajewski. Contributor. Sawyer Welden. Contributor. Christopher Jackson. Reviewer. Marvin Wright. Reviewer. Lukas Burk. Reviewer.","code":""},{"path":"https://bcjaeger.github.io/aorsf/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Jaeger et al. (2022). aorsf: R package supervised learning using oblique random survival forest. Journal Open Source Software, 7(77), 4705. https://doi.org/10.21105/joss.04705. Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM. Accelerated interpretable oblique random survival forests. Journal Computational Graphical Statistics. 2023 Aug 3:1-6. Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min YI, Mcclure LA, Howard G, Simon N. Oblique Random Survival Forests. Annals Applied Statistics. 13(3): 1847-1883. URL https://doi.org/10.1214/19-AOAS1261 DOI: 10.1214/19-AOAS1261","code":"@Article{,   title = {aorsf: An R package for supervised learning using the oblique random survival forest},   author = {Byron C. Jaeger and Sawyer Welden and Kristin Lenoir and Nicholas M. Pajewski},   journal = {Journal of Open Source Software},   year = {2022},   volume = {7},   number = {77},   pages = {4705},   url = {https://doi.org/10.21105/joss.04705}, } @Article{,   title = {Accelerated and interpretable oblique random survival forests},   author = {Byron C. Jaeger and Sawyer Welden and Kristin Lenoir and Jaime L. Speiser and Matthew W. Segar and Ambarish Pandey and Nicholas M. Pajewski},   journal = {Journal of Computational and Graphical Statistics},   year = {2023},   url = {https://doi.org/10.1080/10618600.2023.2231048}, } @Article{,   title = {Oblique Random Survival Forests},   author = {Byron C. Jaeger and D. Leann Long and Dustin M. Long and Mario Sims and Jeff M. Szychowski and Yuan-I Min and Leslie A. Mcclure and George Howard and Noah Simon},   journal = {Annals of Applied Statistics},   year = {2019},   volume = {13},   number = {3},   pages = {1847--1883},   url = {https://doi.org/10.1214/19-AOAS1261}, }"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"aorsf-","dir":"","previous_headings":"","what":"Accelerated Oblique Random Forests","title":"Accelerated Oblique Random Forests","text":"Fit, interpret, make predictions oblique random forests (RFs).","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"why-aorsf","dir":"","previous_headings":"","what":"Why aorsf?","title":"Accelerated Oblique Random Forests","text":"Fast versatile tools oblique RFs.1 Accurate predictions.2 Intuitive design formula based interface. Extensive input checks informative error messages. Compatible tidymodels mlr3","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Accelerated Oblique Random Forests","text":"can install aorsf CRAN using can install development version aorsf GitHub :","code":"install.packages(\"aorsf\") # install.packages(\"remotes\") remotes::install_github(\"ropensci/aorsf\")"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"get-started","dir":"","previous_headings":"","what":"Get started","title":"Accelerated Oblique Random Forests","text":"aorsf fits several types oblique RFs orsf() function, including classification, regression, survival RFs. classification, fit oblique RF predict penguin species using penguin data magnificent palmerpenguins R package regression, use data predict bill length penguins: personal favorite oblique survival RF accelerated Cox regression first type oblique RF aorsf provided (see JCGS paper). , use predict mortality risk following diagnosis primary biliary cirrhosis:","code":"library(aorsf) library(tidyverse) # An oblique classification RF penguin_fit <- orsf(data = penguins_orsf,                     n_tree = 5,                      formula = species ~ .)  penguin_fit #> ---------- Oblique random classification forest #>  #>      Linear combinations: Accelerated Logistic regression #>           N observations: 333 #>                N classes: 3 #>                  N trees: 5 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 6 #> Min observations in leaf: 5 #>           OOB stat value: 0.99 #>            OOB stat type: AUC-ROC #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique regression RF bill_fit <- orsf(data = penguins_orsf,                   n_tree = 5,                   formula = bill_length_mm ~ .)  bill_fit #> ---------- Oblique random regression forest #>  #>      Linear combinations: Accelerated Linear regression #>           N observations: 333 #>                  N trees: 5 #>       N predictors total: 7 #>    N predictors per node: 3 #>  Average leaves per tree: 42.6 #> Min observations in leaf: 5 #>           OOB stat value: 0.76 #>            OOB stat type: RSQ #>      Variable importance: anova #>  #> ----------------------------------------- # An oblique survival RF pbc_fit <- orsf(data = pbc_orsf,                  n_tree = 5,                 formula = Surv(time, status) ~ . - id)  pbc_fit #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 5 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 20.4 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.79 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"what-does-oblique-mean","dir":"","previous_headings":"","what":"What does “oblique” mean?","title":"Accelerated Oblique Random Forests","text":"Decision trees grown splitting set training data non-overlapping subsets, goal similarity within new subsets . subsets created single predictor, decision tree axis-based subset boundaries perpendicular axis predictor. linear combinations (.e., weighted sum) variables used instead single variable, tree oblique boundaries neither parallel perpendicular axis. Figure: Decision trees classification axis-based splitting (left) oblique splitting (right). Cases orange squares; controls purple circles. trees partition predictor space defined variables X1 X2, oblique splits better job separating two classes.  , difference translate real data, impact random forests comprising hundreds axis-based oblique trees? demonstrate using penguin data.3 also use function make several plots: also use grid points plotting decision surfaces: use orsf mtry=1 fit axis-based trees: Next use orsf_update copy modify original model, expanding fit oblique tree using mtry=2 instead mtry=1, include 500 trees instead 1: now need visualize decision surfaces using predictions four fits: Figure: Axis-based oblique decision surfaces single tree ensemble 500 trees. Axis-based trees boundaries perpendicular predictor axes, whereas oblique trees can boundaries neither parallel perpendicular predictor axes. Axis-based forests tend ‘step-function’ decision boundaries, oblique forests tend smooth decision boundaries.","code":"plot_decision_surface <- function(predictions, title, grid){    # this is not a general function for plotting  # decision surfaces. It just helps to minimize   # copying and pasting of code.    class_preds <- bind_cols(grid, predictions) %>%   pivot_longer(cols = c(Adelie,                         Chinstrap,                         Gentoo)) %>%   group_by(flipper_length_mm, bill_length_mm) %>%   arrange(desc(value)) %>%   slice(1)    cols <- c(\"darkorange\", \"purple\", \"cyan4\")   ggplot(class_preds, aes(bill_length_mm, flipper_length_mm)) +   geom_contour_filled(aes(z = value, fill = name),                       alpha = .25) +   geom_point(data = penguins_orsf,              aes(color = species, shape = species),              alpha = 0.5) +   scale_color_manual(values = cols) +   scale_fill_manual(values = cols) +   labs(x = \"Bill length, mm\",        y = \"Flipper length, mm\") +   theme_minimal() +   scale_x_continuous(expand = c(0,0)) +   scale_y_continuous(expand = c(0,0)) +   theme(panel.grid = element_blank(),         panel.border = element_rect(fill = NA),         legend.position = '') +    labs(title = title)   } grid <- expand_grid(   flipper_length_mm = seq(min(penguins_orsf$flipper_length_mm),                      max(penguins_orsf$flipper_length_mm),                   len = 200),  bill_length_mm = seq(min(penguins_orsf$bill_length_mm),                       max(penguins_orsf$bill_length_mm),                       len = 200) ) fit_axis_tree <- penguins_orsf %>%   orsf(species ~ bill_length_mm + flipper_length_mm,       n_tree = 1,       mtry = 1,       tree_seeds = 106760) fit_axis_forest <- fit_axis_tree %>%   orsf_update(n_tree = 500)  fit_oblique_tree <- fit_axis_tree %>%   orsf_update(mtry = 2)  fit_oblique_forest <- fit_oblique_tree %>%   orsf_update(n_tree = 500) preds <- list(fit_axis_tree,               fit_axis_forest,               fit_oblique_tree,               fit_oblique_forest) %>%   map(predict, new_data = grid, pred_type = 'prob')  titles <- c(\"Axis-based tree\",             \"Axis-based forest\",             \"Oblique tree\",             \"Oblique forest\")  plots <- map2(preds, titles, plot_decision_surface, grid = grid)"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"variable-importance","dir":"","previous_headings":"","what":"Variable importance","title":"Accelerated Oblique Random Forests","text":"importance individual predictor variables can estimated three ways using aorsf can used type oblique RF. Also, variable importance functions always return named character vector negation2: variable assessed separately multiplying variable’s coefficients -1 determining much model’s performance changes. worse model’s performance negating coefficients given variable, important variable. technique promising b/c require permutation emphasizes variables larger coefficients linear combinations, also relatively new hasn’t studied much permutation importance. See Jaeger, (2023) details technique. permutation: variable assessed separately randomly permuting variable’s values determining much model’s performance changes. worse model’s performance permuting values given variable, important variable. technique flexible, intuitive, frequently used. also several known limitations analysis variance (ANOVA)4: p-value computed coefficient linear combination variables decision tree. Importance individual predictor variable proportion times p-value coefficient < 0.01. technique efficient computationally, may effective permutation negation terms selecting signal noise variables. See Menze, 2011 details technique. can supply R function estimate --bag error (see oob vignette) estimate --bag variable importance (see orsf_vi examples)","code":"orsf_vi_negate(pbc_fit) #>          bili        copper         stage           sex           age  #>  0.1552460736  0.1156218837  0.0796917628  0.0533427094  0.0283132385  #>       albumin           trt          chol      alk.phos      platelet  #>  0.0279823814  0.0168238416  0.0153010749  0.0148718669  0.0094582765  #>         edema       ascites       spiders       protime        hepato  #>  0.0067975986  0.0065505801  0.0062356214 -0.0004653046 -0.0026664147  #>           ast          trig  #> -0.0028902524 -0.0106616501 orsf_vi_permute(penguin_fit) #>    bill_length_mm     bill_depth_mm       body_mass_g            island  #>       0.121351910       0.101846889       0.097822451       0.080772909  #>               sex flipper_length_mm              year  #>       0.035053517       0.008270751      -0.008058339 orsf_vi_anova(bill_fit) #>           species               sex     bill_depth_mm flipper_length_mm  #>        0.51652893        0.27906977        0.06315789        0.04950495  #>       body_mass_g            island              year  #>        0.04807692        0.02687148        0.00000000"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"partial-dependence-pd","dir":"","previous_headings":"","what":"Partial dependence (PD)","title":"Accelerated Oblique Random Forests","text":"Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model’s prediction.. can use specific values predictor compute PD let aorsf pick reasonable values use pred_spec_auto(): summary function, orsf_summarize_uni(), computes PD many variables ask , using sensible values. PD, see vignette","code":"# pick your own values orsf_pd_oob(bill_fit, pred_spec = list(species = c(\"Adelie\", \"Gentoo\"))) #>    species     mean      lwr     medn      upr #>     <fctr>    <num>    <num>    <num>    <num> #> 1:  Adelie 39.99394 35.76532 39.80782 46.13931 #> 2:  Gentoo 46.66565 40.02938 46.88517 51.61367  # let aorsf pick reasonable values for you: orsf_pd_oob(bill_fit, pred_spec = pred_spec_auto(bill_depth_mm, island)) #>     bill_depth_mm    island     mean      lwr     medn      upr #>             <num>    <fctr>    <num>    <num>    <num>    <num> #>  1:          14.3    Biscoe 43.94960 35.90421 45.30159 51.05109 #>  2:          15.6    Biscoe 44.24705 36.62759 45.57321 51.08020 #>  3:          17.3    Biscoe 44.84757 36.53804 45.62910 53.93833 #>  4:          18.7    Biscoe 45.08939 36.35893 46.16893 54.42075 #>  5:          19.5    Biscoe 45.13608 36.21033 46.08023 54.42075 #> ---                                                             #> 11:          14.3 Torgersen 43.55984 35.47143 44.18127 51.05109 #> 12:          15.6 Torgersen 43.77317 35.44683 44.28406 51.08020 #> 13:          17.3 Torgersen 44.56465 35.84585 44.83694 53.93833 #> 14:          18.7 Torgersen 44.68367 35.44010 44.86667 54.42075 #> 15:          19.5 Torgersen 44.64605 35.44010 44.86667 54.42075 orsf_summarize_uni(pbc_fit, n_variables = 2) #>  #> -- bili (VI Rank: 1) ----------------------------- #>  #>         |----------------- Risk -----------------| #>   Value      Mean     Median     25th %    75th % #>  <char>     <num>      <num>      <num>     <num> #>    0.60 0.2098108 0.07168855 0.01138461 0.2860450 #>    0.80 0.2117933 0.07692308 0.01709469 0.2884990 #>    1.40 0.2326560 0.08445419 0.02100837 0.3563622 #>    3.55 0.4265979 0.35820106 0.05128824 0.7342923 #>    7.30 0.4724608 0.44746241 0.11759259 0.8039683 #>  #> -- copper (VI Rank: 2) --------------------------- #>  #>         |----------------- Risk -----------------| #>   Value      Mean     Median     25th %    75th % #>  <char>     <num>      <num>      <num>     <num> #>    25.0 0.2332412 0.04425936 0.01587919 0.3888304 #>    42.5 0.2535448 0.07417582 0.01754386 0.4151786 #>    74.0 0.2825471 0.11111111 0.01988069 0.4770833 #>     130 0.3259604 0.18771003 0.04658385 0.5054348 #>     217 0.4213303 0.28571429 0.13345865 0.6859423 #>  #>  Predicted risk at time t = 1788 for top 2 predictors"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"individual-conditional-expectations-ice","dir":"","previous_headings":"","what":"Individual conditional expectations (ICE)","title":"Accelerated Oblique Random Forests","text":"Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor. ICE, see vignette","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"interaction-scores","dir":"","previous_headings":"","what":"Interaction scores","title":"Accelerated Oblique Random Forests","text":"orsf_vint() function computes score possible interaction model based PD using method described Greenwell et al, 2018.5 can slow larger datasets, substantial speedups occur making use multi-threading restricting search smaller set predictors. values score mean? values average standard deviation standard deviation PD one variable conditional variable. interpreted relative one another, .e., higher scoring interaction likely reflect real interaction two variables lower scoring one. interaction scores make sense? Let’s test top scoring lowest scoring interactions using coxph(). Note: exploratory true null hypothesis test. ? used data generate test null hypothesis. much conducting statistical inference test interactions coxph demonstrating interaction scores orsf_vint() provides consistent tests models.","code":"preds_interaction <- c(\"albumin\", \"protime\", \"bili\", \"spiders\", \"trt\")  # While it is tempting to speed up `orsf_vint()` by growing a smaller  # number of trees, results may become unstable with this shortcut. pbc_interactions <- pbc_fit %>%   orsf_update(n_tree = 500, tree_seeds = 329) %>%   orsf_vint(n_thread = 0,  predictors = preds_interaction)  pbc_interactions #>          interaction      score #>               <char>      <num> #>  1: albumin..protime 0.97837184 #>  2:    protime..bili 0.78999788 #>  3:    albumin..bili 0.59128756 #>  4:    bili..spiders 0.13192184 #>  5:        bili..trt 0.13192184 #>  6: albumin..spiders 0.06578222 #>  7:     albumin..trt 0.06578222 #>  8: protime..spiders 0.03012718 #>  9:     protime..trt 0.03012718 #> 10:     spiders..trt 0.00000000 library(survival) # the top scoring interaction should get a lower p-value anova(coxph(Surv(time, status) ~ protime * albumin, data = pbc_orsf)) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>                  loglik  Chisq Df Pr(>|Chi|)     #> NULL            -550.19                          #> protime         -538.51 23.353  1  1.349e-06 *** #> albumin         -514.89 47.255  1  6.234e-12 *** #> protime:albumin -511.76  6.252  1    0.01241 *   #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # the bottom scoring interaction should get a higher p-value anova(coxph(Surv(time, status) ~ spiders * trt, data = pbc_orsf)) #> Analysis of Deviance Table #>  Cox model: response is Surv(time, status) #> Terms added sequentially (first to last) #>  #>              loglik   Chisq Df Pr(>|Chi|)     #> NULL        -550.19                           #> spiders     -538.58 23.2159  1  1.448e-06 *** #> trt         -538.39  0.3877  1     0.5335     #> spiders:trt -538.29  0.2066  1     0.6494     #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"comparison-to-existing-software","dir":"","previous_headings":"","what":"Comparison to existing software","title":"Accelerated Oblique Random Forests","text":"survival analysis, comparisons aorsf existing software presented JCGS paper. paper: describes aorsf detail summary procedures used tree fitting algorithm runs general benchmark comparing aorsf obliqueRSF several learners reports prediction accuracy computational efficiency learners. runs simulation study comparing variable importance techniques oblique survival RFs, axis based survival RFs, boosted trees. reports probability variable importance technique rank relevant variable higher importance irrelevant variable.","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"references","dir":"","previous_headings":"","what":"References","title":"Accelerated Oblique Random Forests","text":"Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min Y, Mcclure LA, Howard G, Simon N (2019). “Oblique random survival forests.” Annals Applied Statistics, 13(3). doi:10.1214/19-aoas1261 https://doi.org/10.1214/19-aoas1261. Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM (2023). “Accelerated interpretable oblique random survival forests.” Journal Computational Graphical Statistics, 1-16. doi:10.1080/10618600.2023.2231048 https://doi.org/10.1080/10618600.2023.2231048. Horst , Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0, https://allisonhorst.github.io/palmerpenguins/. Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, F (2011). “oblique random forests.” Machine Learning Knowledge Discovery Databases: European Conference, ECML PKDD 2011, Athens, Greece, September 5-9, 2011, Proceedings, Part II 22, 453-469. Springer. Greenwell, M B, Boehmke, C B, McCarthy, J (2018). “simple effective model-based variable importance measure.” arXiv preprint arXiv:1805.04755.","code":""},{"path":"https://bcjaeger.github.io/aorsf/index.html","id":"funding","dir":"","previous_headings":"","what":"Funding","title":"Accelerated Oblique Random Forests","text":"developers aorsf received financial support Center Biomedical Informatics, Wake Forest University School Medicine. also received support National Center Advancing Translational Sciences National Institutes Health Award Number UL1TR001420. content solely responsibility authors necessarily represent official views National Institutes Health.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/aorsf-package.html","id":null,"dir":"Reference","previous_headings":"","what":"aorsf: Accelerated Oblique Random Forests — aorsf-package","title":"aorsf: Accelerated Oblique Random Forests — aorsf-package","text":"Fit, interpret, compute predictions oblique random forests. Includes support partial dependence, variable importance, passing customized functions variable importance identification linear combinations features. Methods oblique random survival forest described Jaeger et al., (2023) doi:10.1080/10618600.2023.2231048 .","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/aorsf-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"aorsf: Accelerated Oblique Random Forests — aorsf-package","text":"Maintainer: Byron Jaeger bjaeger@wakehealth.edu (ORCID) contributors: Nicholas Pajewski [contributor] Sawyer Welden swelden@wakehealth.edu [contributor] Christopher Jackson chris.jackson@mrc-bsu.cam.ac.uk [reviewer] Marvin Wright [reviewer] Lukas Burk [reviewer]","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":null,"dir":"Reference","previous_headings":"","what":"Coerce to data.table — as.data.table.orsf_summary_uni","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"Convert 'orsf_summary' object data.table object.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"","code":"# S3 method for orsf_summary_uni as.data.table(x, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"x object class 'orsf_summary_uni' ... used","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"data.table","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/as.data.table.orsf_summary_uni.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Coerce to data.table — as.data.table.orsf_summary_uni","text":"","code":"if (FALSE) {  library(data.table)  object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 25)  smry <- orsf_summarize_uni(object, n_variables = 2)  as.data.table(smry)  }"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":null,"dir":"Reference","previous_headings":"","what":"Oblique Random Forests — orsf","title":"Oblique Random Forests — orsf","text":"Grow specify oblique random forest. name orsf() implies function works survival forests, can used classification, regression, survival forests.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Oblique Random Forests — orsf","text":"","code":"orsf(   data,   formula,   control = NULL,   weights = NULL,   n_tree = 500,   n_split = 5,   n_retry = 3,   n_thread = 0,   mtry = NULL,   sample_with_replacement = TRUE,   sample_fraction = 0.632,   leaf_min_events = 1,   leaf_min_obs = 5,   split_rule = NULL,   split_min_events = 5,   split_min_obs = 10,   split_min_stat = NULL,   oobag_pred_type = NULL,   oobag_pred_horizon = NULL,   oobag_eval_every = NULL,   oobag_fun = NULL,   importance = \"anova\",   importance_max_pvalue = 0.01,   group_factors = TRUE,   tree_seeds = NULL,   attach_data = TRUE,   no_fit = FALSE,   na_action = \"fail\",   verbose_progress = FALSE,   ... )  orsf_train(object, attach_data = TRUE)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Oblique Random Forests — orsf","text":"data data.frame, tibble, data.table contains relevant variables. formula (formula) Two sided formula single outcome. terms right names predictor variables, symbol '.' may used indicate variables data except response. symbol '-' may also used indicate removal predictor. Details response vary depending forest type: Classification: response single variable, variable type factor data. Regression: response single variable, variable typee double integer least 10 unique numeric values data. Survival: response include time variable, followed status variable, may written inside call Surv (see examples). control (orsf_control) object returned one orsf_control functions: orsf_control_survival, orsf_control_classification, orsf_control_regression. NULL (default) use accelerated control, fastest available option. survival classification, Cox Logistic regression 1 iteration, regression ordinary least squares. weights (numeric vector) Optional. given, input length equal nrow(data) complete imputed data length equal nrow(na.omit(data)) na_action \"omit\". weights vector used count observations events prior growing node tree, orsf() scales weights sum(weights) == nrow(data). helps make tree depth consistent weighted un-weighted fits. n_tree (integer) number trees grow. Default n_tree = 500. n_split (integer) number cut-points assessed splitting node decision trees. Default n_split = 5. n_retry (integer) node splittable, current linear combination inputs unable provide valid split, orsf try new linear combination based different set randomly selected predictors, n_retry times. Default n_retry = 3. Set n_retry = 0 prevent retries. n_thread (integer) number threads use growing trees, computing predictions, computing importance. Default 0, allows suitable number threads used based availability. mtry (integer) Number predictors randomly included candidates splitting node. default smallest integer greater square root number total predictors, .e., mtry = ceiling(sqrt(number predictors)) sample_with_replacement (logical) TRUE (default), observations sampled replacement -bag sample created decision tree. FALSE, observations sampled without replacement tree -bag sample containing sample_fraction% original sample. sample_fraction (double) proportion observations trees' -bag sample contain, relative number rows data. used sample_with_replacement FALSE. Default value 0.632. leaf_min_events (integer) input relevant survival analysis, specifies minimum number events leaf node. Default leaf_min_events = 1 leaf_min_obs (integer) minimum number observations leaf node. Default leaf_min_obs = 5. split_rule (character) assess quality potential splitting rule node. Valid options survival : 'logrank' : log-rank test statistic (default). 'cstat'   : Harrell's concordance statistic. classification, valid options : 'gini'  : gini impurity (default) 'cstat' : area underneath ROC curve (AUC-ROC) regression, valid options : 'variance' : variance reduction (default) split_min_events (integer) minimum number events required node consider splitting . Default split_min_events = 5. input relevant survival trees. split_min_obs (integer) minimum number observations required node consider splitting . Default split_min_obs = 10. split_min_stat (double) minimum test statistic required split node. splits found statistic exceeding split_min_stat, given node either becomes leaf retry occurs (n_retry retries). Defaults 3.84 split_rule = 'logrank' 0.55 split_rule = 'cstat' (see first note ) 0.00 split_rule = 'gini' (see second note ) 0.00 split_rule = 'variance' Note 1 C-statistic splitting, C < 0.50, consider statistic value 1 - C allow good 'anti-predictive' splits. , C-statistic initially computed 0.1, considered 1 - 0.10 = 0.90. Note 2 Gini impurity, value 0 1 usually indicate best worst possible scores, respectively. make things simple avoid introducing split_max_stat input, flip values Gini impurity 1 0 indicate best worst possible scores, respectively. oobag_pred_type (character) type --bag predictions compute fitting ensemble. Valid options tree type: 'none' : compute --bag predictions 'leaf' : ID predicted leaf returned tree Valid options survival: 'risk' : probability event occurring oobag_pred_horizon (default). 'surv' : 1 - risk. 'chf'  : cumulative hazard function oobag_pred_horizon. 'mort' : mortality, .e., number events expected observations training data identical given observation. Valid options classification: 'prob'  : probability class (default) 'class' : class (.e., .max(prob)) Valid options regression: 'mean' : mean value (default) oobag_pred_horizon (numeric) numeric value indicating time used --bag predictions. Default median observed times, .e., oobag_pred_horizon = median(time). input relevant survival trees prediction type 'risk', 'surv', 'chf'. oobag_eval_every (integer) --bag performance ensemble checked every oobag_eval_every trees. , oobag_eval_every = 10, --bag performance checked growing 10th tree, 20th tree, . Default oobag_eval_every = n_tree. oobag_fun (function) used evaluating --bag prediction accuracy every oobag_eval_every trees. oobag_fun = NULL (default), evaluation statistic selected based tree type survival: Harrell's C-statistic (1982) classification: Area underneath ROC curve (AUC-ROC) regression: Traditional prediction R-squared use oobag_fun note following: oobag_fun three inputs: y_mat, w_vec, s_vec survival trees, y_mat two column matrix first column named 'time' second named 'status'. classification trees, y_mat matrix number columns = number distinct classes outcome. regression, y_mat matrix one column. s_vec numeric vector containing predictions oobag_fun return numeric output length 1 details, see --bag vignette. importance (character) Indicate method variable importance: 'none': variable importance computed. 'anova': compute analysis variance (ANOVA) importance 'negate': compute negation importance 'permute': compute permutation importance details methods, see orsf_vi. importance_max_pvalue (double) relevant importance \"anova\". maximum p-value register positive case counting number times variable found 'significant' tree growth. Default 0.01, recommended Menze et al. group_factors (logical) relevant variable importance estimated. TRUE, importance factor variables reported overall aggregating importance individual levels factor. FALSE, importance individual factor levels returned. tree_seeds (integer vector) Optional. specified, random seeds set using values tree_seeds[]  growing tree . Two forests grown number trees seeds exact --bag samples, making --bag error estimates forests comparable. NULL (default), seeds picked random. attach_data (logical) TRUE, copy training data attached output. required plan using functions like orsf_pd_oob orsf_summarize_uni interpret forest using training data. Default TRUE. no_fit (logical) TRUE, model fitting steps defined saved, training initiated. object returned can directly submitted orsf_train() long attach_data TRUE. na_action (character) happen data contains missing values (.e., NA values). Valid options : 'fail' : error thrown data contains NA values 'omit' : rows data incomplete data dropped 'impute_meanmode' : missing values continuous categorical variables data imputed using mean mode, respectively. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used). object untrained 'aorsf' object, created setting no_fit = TRUE orsf().","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Oblique Random Forests — orsf","text":"obliqueForest object","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Oblique Random Forests — orsf","text":"function called orf()? earlier versions, aorsf package exclusively oblique random survival forests. formula survival oblique RFs: response formula can survival object returned Surv function, can also just time status variables. .e., Surv(time, status) ~ . works time + status ~ . works response can also survival object stored data. example, y ~ . valid formula data$y inherits Surv class. mtry: mtry parameter may temporarily reduced ensure linear models used find combinations predictors remain stable. occurs coefficients linear model fitting algorithms may become infinite number predictors exceeds number observations. oobag_fun: oobag_fun specified, used compute negation importance permutation importance, role ANOVA importance. n_thread: R function called C++ (.e., user-supplied function compute --bag error identify linear combinations variables), n_thread automatically set 1 attempting run R functions multiple threads cause R session crash.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"what-is-an-oblique-decision-tree-","dir":"Reference","previous_headings":"","what":"What is an oblique decision tree?","title":"Oblique Random Forests — orsf","text":"Decision trees developed splitting set training data two new subsets, goal similarity within new subsets . splitting process repeated resulting subsets data stopping criterion met. new subsets data formed based single predictor, decision tree said axis-based splits data appear perpendicular axis predictor. linear combinations variables used instead single variable, tree oblique splits data neither parallel right angle axis Figure : Decision trees classification axis-based splitting (left) oblique splitting (right). Cases orange squares; controls purple circles. trees partition predictor space defined variables X1 X2, oblique splits better job separating two classes.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"what-is-a-random-forest-","dir":"Reference","previous_headings":"","what":"What is a random forest?","title":"Oblique Random Forests — orsf","text":"Random forests collections de-correlated decision trees. Predictions tree aggregated make ensemble prediction forest. details, see Breiman el, 2001.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"training-out-of-bag-error-and-testing","dir":"Reference","previous_headings":"","what":"Training, out-of-bag error, and testing","title":"Oblique Random Forests — orsf","text":"random forests, tree grown bootstrapped version training set. bootstrap samples selected replacement, bootstrapped training set contains two-thirds instances original training set. '--bag' data instances bootstrapped training set. tree random forest can make predictions --bag data, --bag predictions can aggregated make ensemble --bag prediction. Since --bag data used grow tree, accuracy ensemble --bag predictions approximate generalization error random forest. Generalization error refers error random forest's predictions applied predict outcomes data used train , .e., testing data.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Oblique Random Forests — orsf","text":"orsf() entry-point aorsf package. can used fit classification, regression, survival forests. classification, fit oblique RF predict penguin species using penguin data magnificent palmerpenguins R package     regression, use data predict bill length penguins:     personal favorite oblique survival RF accelerated Cox regression first type oblique RF aorsf provided (see ArXiv paper; paper also published Journal Computational Graphical Statistics publicly available ). , use predict mortality risk following diagnosis primary biliary cirrhosis:","code":"library(aorsf) library(magrittr) # for %>% ##  ## Attaching package: 'magrittr'  ## The following object is masked from 'package:tidyr': ##  ##     extract  ## The following objects are masked from 'package:testthat': ##  ##     equals, is_less_than, not # An oblique classification RF penguin_fit <- orsf(data = penguins_orsf,                     n_tree = 5,                      formula = species ~ .)  penguin_fit ## ---------- Oblique random classification forest ##  ##      Linear combinations: Accelerated Logistic regression ##           N observations: 333 ##                N classes: 3 ##                  N trees: 5 ##       N predictors total: 7 ##    N predictors per node: 3 ##  Average leaves per tree: 4.6 ## Min observations in leaf: 5 ##           OOB stat value: 0.99 ##            OOB stat type: AUC-ROC ##      Variable importance: anova ##  ## ----------------------------------------- # An oblique regression RF bill_fit <- orsf(data = penguins_orsf,                   n_tree = 5,                   formula = bill_length_mm ~ .)  bill_fit ## ---------- Oblique random regression forest ##  ##      Linear combinations: Accelerated Linear regression ##           N observations: 333 ##                  N trees: 5 ##       N predictors total: 7 ##    N predictors per node: 3 ##  Average leaves per tree: 51 ## Min observations in leaf: 5 ##           OOB stat value: 0.70 ##            OOB stat type: RSQ ##      Variable importance: anova ##  ## ----------------------------------------- # An oblique survival RF pbc_fit <- orsf(data = pbc_orsf,                  n_tree = 5,                 formula = Surv(time, status) ~ . - id)  pbc_fit ## ---------- Oblique random survival forest ##  ##      Linear combinations: Accelerated Cox regression ##           N observations: 276 ##                 N events: 111 ##                  N trees: 5 ##       N predictors total: 17 ##    N predictors per node: 5 ##  Average leaves per tree: 22.2 ## Min observations in leaf: 5 ##       Min events in leaf: 1 ##           OOB stat value: 0.78 ##            OOB stat type: Harrell's C-index ##      Variable importance: anova ##  ## -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"more-than-one-way-to-grow-a-forest","dir":"Reference","previous_headings":"","what":"More than one way to grow a forest","title":"Oblique Random Forests — orsf","text":"can use orsf(no_fit = TRUE) make specification grow forest instead fitted forest.     ? Two reasons: computational tasks, may want check long take fit forest commit :     fitting multiple forests, use blueprint along orsf_train() orsf_update() simplify code:","code":"orsf_spec <- orsf(pbc_orsf,                    formula = time + status ~ . - id,                   no_fit = TRUE)  orsf_spec ## Untrained oblique random survival forest ##  ##      Linear combinations: Accelerated Cox regression ##           N observations: 276 ##                 N events: 111 ##                  N trees: 500 ##       N predictors total: 17 ##    N predictors per node: 5 ##  Average leaves per tree: 0 ## Min observations in leaf: 5 ##       Min events in leaf: 1 ##           OOB stat value: none ##            OOB stat type: Harrell's C-index ##      Variable importance: anova ##  ## ----------------------------------------- orsf_spec %>%   orsf_update(n_tree = 10000) %>%  orsf_time_to_train() ## Time difference of 2.429678 secs orsf_fit <- orsf_train(orsf_spec) orsf_fit_10 <- orsf_update(orsf_fit, leaf_min_obs = 10) orsf_fit_20 <- orsf_update(orsf_fit, leaf_min_obs = 20)  orsf_fit$leaf_min_obs ## [1] 5 orsf_fit_10$leaf_min_obs ## [1] 10 orsf_fit_20$leaf_min_obs ## [1] 20"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"tidymodels","dir":"Reference","previous_headings":"","what":"tidymodels","title":"Oblique Random Forests — orsf","text":"tidymodels includes support aorsf computational engine:   Prediction aorsf models different times also supported:","code":"library(tidymodels) library(censored) library(yardstick)  pbc_tidy <- pbc_orsf %>%   mutate(event_time = Surv(time, status), .before = 1) %>%   select(-c(id, time, status)) %>%   as_tibble()  split  <- initial_split(pbc_tidy)  orsf_spec <- rand_forest() %>%   set_engine(\"aorsf\") %>%   set_mode(\"censored regression\")  orsf_fit <- fit(orsf_spec,                  formula = event_time ~ .,                  data = training(split)) time_points <- seq(500, 3000, by = 500)  test_pred <- augment(orsf_fit,                       new_data = testing(split),                       eval_time = time_points)  brier_scores <- test_pred %>%    brier_survival(truth = event_time, .pred)  brier_scores ## # A tibble: 6 x 4 ##   .metric        .estimator .eval_time .estimate ##   <chr>          <chr>           <dbl>     <dbl> ## 1 brier_survival standard          500    0.0597 ## 2 brier_survival standard         1000    0.0943 ## 3 brier_survival standard         1500    0.0883 ## 4 brier_survival standard         2000    0.102  ## 5 brier_survival standard         2500    0.137  ## 6 brier_survival standard         3000    0.153 roc_scores <- test_pred %>%    roc_auc_survival(truth = event_time, .pred)  roc_scores ## # A tibble: 6 x 4 ##   .metric          .estimator .eval_time .estimate ##   <chr>            <chr>           <dbl>     <dbl> ## 1 roc_auc_survival standard          500     0.957 ## 2 roc_auc_survival standard         1000     0.912 ## 3 roc_auc_survival standard         1500     0.935 ## 4 roc_auc_survival standard         2000     0.931 ## 5 roc_auc_survival standard         2500     0.907 ## 6 roc_auc_survival standard         3000     0.889"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Oblique Random Forests — orsf","text":"Harrell, E F, Califf, M R, Pryor, B D, Lee, L K, Rosati, R (1982). \"Evaluating yield medical tests.\" Jama, 247(18), 2543-2546. Breiman, Leo (2001). \"Random Forests.\" Machine Learning, 45(1), 5-32. ISSN 1573-0565. Ishwaran H, Kogalur UB, Blackstone EH, Lauer MS (2008). \"Random survival forests.\" Annals Applied Statistics, 2(3). Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, F (2011). \"oblique random forests.\" Machine Learning Knowledge Discovery Databases: European Conference, ECML PKDD 2011, Athens, Greece, September 5-9, 2011, Proceedings, Part II 22, 453-469. Springer. Jaeger BC, Long DL, Long DM, Sims M, Szychowski JM, Min Y, Mcclure LA, Howard G, Simon N (2019). \"Oblique random survival forests.\" Annals Applied Statistics, 13(3). Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM (2023). \"Accelerated interpretable oblique random survival forests.\" Journal Computational Graphical Statistics, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":null,"dir":"Reference","previous_headings":"","what":"Oblique random forest control — orsf_control","title":"Oblique random forest control — orsf_control","text":"Oblique random forest control","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Oblique random forest control — orsf_control","text":"","code":"orsf_control(   tree_type,   method,   scale_x,   ties,   net_mix,   target_df,   max_iter,   epsilon,   ... )  orsf_control_classification(   method = \"glm\",   scale_x = TRUE,   net_mix = 0.5,   target_df = NULL,   max_iter = 20,   epsilon = 1e-09,   ... )  orsf_control_regression(   method = \"glm\",   scale_x = TRUE,   net_mix = 0.5,   target_df = NULL,   max_iter = 20,   epsilon = 1e-09,   ... )  orsf_control_survival(   method = \"glm\",   scale_x = TRUE,   ties = \"efron\",   net_mix = 0.5,   target_df = NULL,   max_iter = 20,   epsilon = 1e-09,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Oblique random forest control — orsf_control","text":"tree_type (character) type tree. Valid options \"classification\", .e., categorical outcomes \"regression\", .e., continuous outcomes \"survival\", .e., time-event outcomes method (character function) identify linear linear combinations predictors. method character value, must one : 'glm': linear, logistic, cox regression 'net': 'glm' penalty terms 'pca': principal component analysis 'random': random draw uniform distribution method function, used identify  linear combinations predictor variables. method must case accept three inputs named x_node, y_node w_node, expect following types dimensions: x_node (matrix; n rows, p columns) y_node (matrix; n rows, 2 columns) w_node (matrix; n rows, 1 column) addition, method must return matrix p rows 1 column. scale_x (logical) TRUE, values predictors scaled prior instance finding linear combination predictors, using summary values data current node decision tree. ties (character) character string specifying method tie handling. relevant modeling survival outcomes using method engages tied outcome times. ties, methods equivalent. Valid options 'breslow' 'efron'. Efron approximation default accurate dealing tied event times similar computational efficiency compared Breslow method. net_mix (double) elastic net mixing parameter. value 1 gives lasso penalty, value 0 gives ridge penalty. multiple values alpha given, penalized model fit using alpha value prior splitting node. target_df (integer) Preferred number variables used linear combination. example, mtry 5 target_df 3, sample 5 predictors look best linear combination using 3 . max_iter (integer) iteration continues convergence (see eps ) number attempted iterations equal iter_max. epsilon (double) using modeling based method, iteration continues algorithm relative change kind objective less epsilon, absolute change less sqrt(epsilon). ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Oblique random forest control — orsf_control","text":"object class 'orsf_control', used input control argument orsf. Components : tree_type: type trees fit lincomb_type: method linear combinations lincomb_eps: epsilon convergence lincomb_iter_max: max iterations lincomb_scale: scale . lincomb_alpha: mixing parameter lincomb_df_target: target degrees freedom lincomb_ties_method: method ties survival time lincomb_R_function: R function custom splits","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Oblique random forest control — orsf_control","text":"Adjust scale_x risk. Setting scale_x = FALSE reduce computation time also make orsf model dependent scale data, default value TRUE.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Oblique random forest control — orsf_control","text":"First load relevant packages","code":"set.seed(329730) suppressPackageStartupMessages({  library(aorsf)  library(survival)  library(ranger)  library(riskRegression) })"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"accelerated-linear-combinations","dir":"Reference","previous_headings":"","what":"Accelerated linear combinations","title":"Oblique random forest control — orsf_control","text":"accelerated ORSF ensemble default nice balance computational speed prediction accuracy. runs single iteration Newton Raphson scoring Cox partial likelihood function find linear combinations predictors.","code":"fit_accel <- orsf(pbc_orsf,                    control = orsf_control_survival(),                   formula = Surv(time, status) ~ . - id,                   tree_seeds = 329)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"linear-combinations-with-cox-regression","dir":"Reference","previous_headings":"","what":"Linear combinations with Cox regression","title":"Oblique random forest control — orsf_control","text":"Setting inputs orsf_control_survival scale X matrix repeat iterations convergence allows run Cox regression non-terminal node survival tree, using regression coefficients create linear combinations predictors:","code":"control_cph <- orsf_control_survival(method = 'glm',                                       scale_x = TRUE,                                       max_iter = 20)  fit_cph <- orsf(pbc_orsf,                  control = control_cph,                 formula = Surv(time, status) ~ . - id,                 tree_seeds = 329)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"linear-combinations-with-penalized-cox-regression","dir":"Reference","previous_headings":"","what":"Linear combinations with penalized cox regression","title":"Oblique random forest control — orsf_control","text":"Setting method == 'net' runs penalized Cox regression non-terminal node survival tree. can really helpful want feature selection within node, lot slower 'glm' option.","code":"# select 3 predictors out of 5 to be used in # each linear combination of predictors.  control_net <- orsf_control_survival(method = 'net', target_df = 3)  fit_net <- orsf(pbc_orsf,                  control = control_net,                 formula = Surv(time, status) ~ . - id,                 tree_seeds = 329)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control.html","id":"linear-combinations-with-your-own-function","dir":"Reference","previous_headings":"","what":"Linear combinations with your own function","title":"Oblique random forest control — orsf_control","text":"addition built-methods, customized functions can used identify linear combinations predictors. ’ll demonstrate . first uses random coefficients   second derives coefficients principal component analysis   third uses ranger() inside orsf(). approach similar method known reinforcement learning trees (see RLT package), although method “muting” crude compared method proposed Zhu et al.    can plug functions orsf_control_custom(), pass result orsf():   fit seems work best example? Let’s find evaluating --bag survival predictions.   AUC values, highest lowest:     indices prediction accuracy:     inspection, net, accel, rlt high discrimination index prediction accuracy. rando pca less well, aren’t bad.","code":"f_rando <- function(x_node, y_node, w_node){  matrix(runif(ncol(x_node)), ncol=1)  } f_pca <- function(x_node, y_node, w_node) {     # estimate two principal components.  pca <- stats::prcomp(x_node, rank. = 2)  # use the second principal component to split the node  pca$rotation[, 1L, drop = FALSE]   } f_rlt <- function(x_node, y_node, w_node){    colnames(y_node) <- c('time', 'status')  colnames(x_node) <- paste(\"x\", seq(ncol(x_node)), sep = '')    data <- as.data.frame(cbind(y_node, x_node))    if(nrow(data) <= 10)    return(matrix(runif(ncol(x_node)), ncol = 1))    fit <- ranger::ranger(data = data,                         formula = Surv(time, status) ~ .,                         num.trees = 25,                         num.threads = 1,                        min.node.size = 5,                        importance = 'permutation')    out <- sort(fit$variable.importance, decreasing = TRUE)    # \"mute\" the least two important variables  n_vars <- length(out)  if(n_vars > 4){    out[c(n_vars, n_vars-1)] <- 0  }    # ensure out has same variable order as input  out <- out[colnames(x_node)]    # protect yourself  out[is.na(out)] <- 0    matrix(out, ncol = 1)   } fit_rando <- orsf(pbc_orsf,                   Surv(time, status) ~ . - id,                   control = orsf_control_survival(method = f_rando),                   tree_seeds = 329)  fit_pca <- orsf(pbc_orsf,                 Surv(time, status) ~ . - id,                 control = orsf_control_survival(method = f_pca),                 tree_seeds = 329)  fit_rlt <- orsf(pbc_orsf, time + status ~ . - id,                  control = orsf_control_survival(method = f_rlt),                 tree_seeds = 329) risk_preds <- list(  accel = fit_accel$pred_oobag,  cph   = fit_cph$pred_oobag,  net   = fit_net$pred_oobag,  rando = fit_rando$pred_oobag,  pca   = fit_pca$pred_oobag,  rlt   = fit_rlt$pred_oobag )  sc <- Score(object = risk_preds,              formula = Surv(time, status) ~ 1,              data = pbc_orsf,              summary = 'IPA',             times = fit_accel$pred_horizon) sc$AUC$score[order(-AUC)] ##     model times       AUC         se     lower     upper ##    <fctr> <num>     <num>      <num>     <num>     <num> ## 1:    net  1788 0.9151649 0.02025057 0.8754745 0.9548553 ## 2:    rlt  1788 0.9119200 0.02090107 0.8709547 0.9528854 ## 3:  accel  1788 0.9095628 0.02143250 0.8675558 0.9515697 ## 4:    cph  1788 0.9095628 0.02143250 0.8675558 0.9515697 ## 5:  rando  1788 0.9062197 0.02148854 0.8641029 0.9483365 ## 6:    pca  1788 0.8999479 0.02226683 0.8563057 0.9435901 sc$Brier$score[order(-IPA), .(model, times, IPA)] ##         model times       IPA ##        <fctr> <num>     <num> ## 1:        net  1788 0.4905777 ## 2:      accel  1788 0.4806649 ## 3:        cph  1788 0.4806649 ## 4:        rlt  1788 0.4675228 ## 5:        pca  1788 0.4383995 ## 6:      rando  1788 0.4302814 ## 7: Null model  1788 0.0000000"},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":null,"dir":"Reference","previous_headings":"","what":"Cox regression ORSF control — orsf_control_cph","title":"Cox regression ORSF control — orsf_control_cph","text":"Use coefficients proportional hazards model create linear combinations predictor variables fitting orsf model.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Cox regression ORSF control — orsf_control_cph","text":"","code":"orsf_control_cph(method = \"efron\", eps = 1e-09, iter_max = 20, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Cox regression ORSF control — orsf_control_cph","text":"method (character) character string specifying method tie handling. ties, methods equivalent. Valid options 'breslow' 'efron'. Efron approximation default accurate dealing tied event times similar computational efficiency compared Breslow method. eps (double) using Newton Raphson scoring identify linear combinations inputs, iteration continues algorithm relative change  log partial likelihood less eps, absolute change less sqrt(eps). Must positive. default value 1e-09 used consistency survival::coxph.control. iter_max (integer) iteration continues convergence (see eps ) number attempted iterations equal iter_max. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Cox regression ORSF control — orsf_control_cph","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Cox regression ORSF control — orsf_control_cph","text":"code  survival package modified make routine. details Cox proportional hazards model, see coxph /Therneau Grambsch (2000).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_cph.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Cox regression ORSF control — orsf_control_cph","text":"Therneau T.M., Grambsch P.M. (2000) Cox Model. : Modeling Survival Data: Extending Cox Model. Statistics Biology Health. Springer, New York, NY. DOI: 10.1007/978-1-4757-3294-8_3","code":""},{"path":[]},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_custom.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Custom ORSF control — orsf_control_custom","text":"","code":"orsf_control_custom(beta_fun, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_custom.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Custom ORSF control — orsf_control_custom","text":"beta_fun (function) function define coefficients used linear combinations predictor variables. beta_fun must accept three inputs named x_node, y_node w_node, expect following types dimensions: x_node (matrix; n rows, p columns) y_node (matrix; n rows, 2 columns) w_node (matrix; n rows, 1 column) addition, beta_fun must return matrix p rows 1 column. conditions met, orsf_control_custom() let know. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_custom.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Custom ORSF control — orsf_control_custom","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":null,"dir":"Reference","previous_headings":"","what":"Accelerated ORSF control — orsf_control_fast","title":"Accelerated ORSF control — orsf_control_fast","text":"Fast methods identify linear combinations predictors fitting orsf model.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Accelerated ORSF control — orsf_control_fast","text":"","code":"orsf_control_fast(method = \"efron\", do_scale = TRUE, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Accelerated ORSF control — orsf_control_fast","text":"method (character) character string specifying method tie handling. ties, methods equivalent. Valid options 'breslow' 'efron'. Efron approximation default accurate dealing tied event times similar computational efficiency compared Breslow method. do_scale (logical) TRUE, values predictors scaled prior instance Newton Raphson scoring, using summary values data current node decision tree. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Accelerated ORSF control — orsf_control_fast","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_fast.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Accelerated ORSF control — orsf_control_fast","text":"code  survival package modified make routine. Adjust do_scale risk. Setting do_scale = FALSE reduce computation time also make orsf model dependent scale data, default value TRUE.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":null,"dir":"Reference","previous_headings":"","what":"Penalized Cox regression ORSF control — orsf_control_net","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"Use regularized Cox proportional hazard models identify linear combinations input variables fitting orsf model.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"","code":"orsf_control_net(alpha = 1/2, df_target = NULL, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"alpha (double) elastic net mixing parameter. value 1 gives lasso penalty, value 0 gives ridge penalty. multiple values alpha given, penalized model fit using alpha value prior splitting node. df_target (integer) Preferred number variables used linear combination. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"object class 'orsf_control', used input control argument orsf.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"df_target less mtry, separate argument orsf indicates number variables chosen random prior finding linear combination variables.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_control_net.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Penalized Cox regression ORSF control — orsf_control_net","text":"Simon, Noah, Friedman, Jerome, Hastie, Trevor, Tibshirani, Rob (2011). \"Regularization paths Cox's proportional hazards model via coordinate descent.\" Journal statistical software, 39(5), 1.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":null,"dir":"Reference","previous_headings":"","what":"Individual Conditional Expectations — orsf_ice_oob","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Compute individual conditional expectations oblique random forest. Unlike partial dependence, shows expected prediction function one multiple predictors, individual conditional expectations (ICE) show prediction individual observation function predictor. can compute individual conditional expectations three ways using random forest: using -bag predictions training data using --bag predictions training data using predictions new set data See examples details","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Individual Conditional Expectations — orsf_ice_oob","text":"","code":"orsf_ice_oob(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_ice_inb(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_ice_new(   object,   pred_spec,   new_data,   pred_horizon = NULL,   pred_type = NULL,   na_action = \"fail\",   expand_grid = TRUE,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Individual Conditional Expectations — orsf_ice_oob","text":"object (ObliqueForest) trained oblique random forest object (see orsf). pred_spec (named list, pspec_auto, data.frame). pred_spec named list, item list vector values used points partial dependence function. name item list indicate variable modified take corresponding values. pred_spec created using pred_spec_auto(), needed names variables use (see pred_spec_auto). pred_spec data.frame, columns indicate variable names, values indicate variable values, partial dependence computed using inputs row. pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_type (character) type predictions compute. Valid Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class regression: 'mean': predicted mean, .e., expected value expand_grid (logical) TRUE, partial dependence computed possible combinations inputs pred_spec. FALSE, partial dependence computed variable pred_spec, separately. boundary_checks (logical) TRUE, pred_spec checked make sure requested values 10th 90th percentile object's training data. FALSE, checks skipped. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used). new_data data.frame, tibble, data.table compute predictions . na_action (character) happen new_data contains missing values (.e., NA values). Valid options : 'fail' : error thrown new_data contains NA values 'omit' : rows new_data incomplete data dropped","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Individual Conditional Expectations — orsf_ice_oob","text":"data.table containing individual conditional expectations specified variable(s) , relevant, specified prediction horizon(s).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Individual Conditional Expectations — orsf_ice_oob","text":"can compute individual conditional expectation individual conditional expectations three ways: using -bag predictions training data. -bag individual conditional expectation indicates relationships model learned training. helpful goal interpret model. using --bag predictions training data. --bag individual conditional expectation indicates relationships model learned training using --bag data simulates application model new data. helpful want test model’s reliability fairness new data don’t access large testing set. using predictions new set data. New data individual conditional expectation shows model predicts outcomes observations seen. helpful want test model’s reliability fairness.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"classification","dir":"Reference","previous_headings":"","what":"Classification","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Begin fitting oblique classification random forest:   Compute individual conditional expectation using --bag data flipper_length_mm = c(190, 210).     two identifiers output: id_variable identifier current value variable(s) data. redundant one variable, helpful multiple variables. id_row identifier observation original data. Note predicted probabilities returned class observation data. Predicted probabilities given observation given variable value sum 1. example,","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  ice_oob <- orsf_ice_oob(fit_clsf, pred_spec = pred_spec)  ice_oob ## Key: <class> ##      id_variable id_row  class flipper_length_mm       pred ##            <int> <char> <fctr>             <num>      <num> ##   1:           1      1 Adelie               190 0.92169247 ##   2:           1      2 Adelie               190 0.80944657 ##   3:           1      3 Adelie               190 0.85172955 ##   4:           1      4 Adelie               190 0.93559327 ##   5:           1      5 Adelie               190 0.97708693 ##  ---                                                        ## 896:           2    146 Gentoo               210 0.26092984 ## 897:           2    147 Gentoo               210 0.04798334 ## 898:           2    148 Gentoo               210 0.07927359 ## 899:           2    149 Gentoo               210 0.84779971 ## 900:           2    150 Gentoo               210 0.11105143 ice_oob %>%  .[flipper_length_mm == 190] %>%   .[id_row == 1] %>%   .[['pred']] %>%   sum() ## [1] 1"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"regression","dir":"Reference","previous_headings":"","what":"Regression","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Begin fitting oblique regression random forest:   Compute individual conditional expectation using new data flipper_length_mm = c(190, 210).     can also let pred_spec_auto pick reasonable values like :     default, combinations variables used. However, can also look variables one one, separately, like :     can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new ##      id_variable id_row flipper_length_mm     pred ##            <int> <char>             <num>    <num> ##   1:           1      1               190 37.94483 ##   2:           1      2               190 37.61595 ##   3:           1      3               190 37.53681 ##   4:           1      4               190 39.49476 ##   5:           1      5               190 38.95635 ##  ---                                               ## 362:           2    179               210 51.80471 ## 363:           2    180               210 47.27183 ## 364:           2    181               210 47.05031 ## 365:           2    182               210 50.39028 ## 366:           2    183               210 48.44774 pred_spec = pred_spec_auto(species, island, body_mass_g)  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new ##       id_variable id_row species    island body_mass_g     pred ##             <int> <char>  <fctr>    <fctr>       <num>    <num> ##    1:           1      1  Adelie    Biscoe        3200 37.78339 ##    2:           1      2  Adelie    Biscoe        3200 37.73273 ##    3:           1      3  Adelie    Biscoe        3200 37.71248 ##    4:           1      4  Adelie    Biscoe        3200 40.25782 ##    5:           1      5  Adelie    Biscoe        3200 40.04074 ##   ---                                                           ## 8231:          45    179  Gentoo Torgersen        5300 46.14559 ## 8232:          45    180  Gentoo Torgersen        5300 43.98050 ## 8233:          45    181  Gentoo Torgersen        5300 44.59837 ## 8234:          45    182  Gentoo Torgersen        5300 44.85146 ## 8235:          45    183  Gentoo Torgersen        5300 44.23710 ice_new <- orsf_ice_new(fit_regr,                          expand_grid = FALSE,                         pred_spec = pred_spec,                         new_data = penguins_orsf_test)  ice_new ##       id_variable id_row    variable value  level     pred ##             <int> <char>      <char> <num> <char>    <num> ##    1:           1      1     species    NA Adelie 37.74136 ##    2:           1      2     species    NA Adelie 37.42367 ##    3:           1      3     species    NA Adelie 37.04598 ##    4:           1      4     species    NA Adelie 39.89602 ##    5:           1      5     species    NA Adelie 39.14848 ##   ---                                                      ## 2009:           5    179 body_mass_g  5300   <NA> 51.50196 ## 2010:           5    180 body_mass_g  5300   <NA> 47.27055 ## 2011:           5    181 body_mass_g  5300   <NA> 48.34064 ## 2012:           5    182 body_mass_g  5300   <NA> 48.75828 ## 2013:           5    183 body_mass_g  5300   <NA> 48.11020 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  ice_new <- orsf_ice_new(fit_regr,                          pred_spec = custom_pred_spec,                         new_data = penguins_orsf_test)  ice_new ##      id_variable id_row species island     pred ##            <int> <char>  <fctr> <fctr>    <num> ##   1:           1      1  Adelie Biscoe 38.52327 ##   2:           1      2  Adelie Biscoe 38.32073 ##   3:           1      3  Adelie Biscoe 37.71248 ##   4:           1      4  Adelie Biscoe 41.68380 ##   5:           1      5  Adelie Biscoe 40.91140 ##  ---                                            ## 179:           1    179  Adelie Biscoe 43.09493 ## 180:           1    180  Adelie Biscoe 38.79455 ## 181:           1    181  Adelie Biscoe 39.37734 ## 182:           1    182  Adelie Biscoe 40.71952 ## 183:           1    183  Adelie Biscoe 39.34501"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_ice_oob.html","id":"survival","dir":"Reference","previous_headings":"","what":"Survival","title":"Individual Conditional Expectations — orsf_ice_oob","text":"Begin fitting oblique survival random forest:   Compute individual conditional expectation using -bag data bili = c(1,2,3,4,5):     don’t specific values variable mind, let pred_spec_auto pick :     Specify pred_horizon get individual conditional expectation value:     Multi-prediction horizon ice comes minimal extra computational cost. Use fine grid time values assess whether predictors time-varying effects.","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) ice_train <- orsf_ice_inb(fit_surv, pred_spec = list(bili = 1:5)) ice_train ##      id_variable id_row pred_horizon  bili      pred ##            <int> <char>        <num> <num>     <num> ##   1:           1      1      1826.25     1 0.1290317 ##   2:           1      2      1826.25     1 0.1242352 ##   3:           1      3      1826.25     1 0.0963452 ##   4:           1      4      1826.25     1 0.1172367 ##   5:           1      5      1826.25     1 0.2030256 ##  ---                                                 ## 746:           5    146      1826.25     5 0.7868537 ## 747:           5    147      1826.25     5 0.2012954 ## 748:           5    148      1826.25     5 0.4893605 ## 749:           5    149      1826.25     5 0.4698220 ## 750:           5    150      1826.25     5 0.9557285 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili)) ice_train ##      id_variable id_row pred_horizon  bili       pred ##            <int> <char>        <num> <num>      <num> ##   1:           1      1      1826.25  0.55 0.11728559 ##   2:           1      2      1826.25  0.55 0.11728839 ##   3:           1      3      1826.25  0.55 0.08950739 ##   4:           1      4      1826.25  0.55 0.10064959 ##   5:           1      5      1826.25  0.55 0.18736417 ##  ---                                                  ## 746:           5    146      1826.25  7.25 0.82600898 ## 747:           5    147      1826.25  7.25 0.29156437 ## 748:           5    148      1826.25  7.25 0.58395919 ## 749:           5    149      1826.25  7.25 0.54202021 ## 750:           5    150      1826.25  7.25 0.96391985 ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili),                           pred_horizon = seq(500, 3000, by = 500)) ice_train ##       id_variable id_row pred_horizon  bili        pred ##             <int> <char>        <num> <num>       <num> ##    1:           1      1          500  0.55 0.008276627 ##    2:           1      1         1000  0.55 0.055724516 ##    3:           1      1         1500  0.55 0.085091120 ##    4:           1      1         2000  0.55 0.123423352 ##    5:           1      1         2500  0.55 0.166380739 ##   ---                                                   ## 4496:           5    150         1000  7.25 0.837774757 ## 4497:           5    150         1500  7.25 0.934536379 ## 4498:           5    150         2000  7.25 0.967823286 ## 4499:           5    150         2500  7.25 0.972059574 ## 4500:           5    150         3000  7.25 0.980785643"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":null,"dir":"Reference","previous_headings":"","what":"Partial dependence — orsf_pd_oob","title":"Partial dependence — orsf_pd_oob","text":"Compute partial dependence oblique random forest. Partial dependence (PD) shows expected prediction model function single predictor multiple predictors. expectation marginalized values predictors, giving something like multivariable adjusted estimate model's prediction. can compute partial dependence three ways using random forest: using -bag predictions training data using --bag predictions training data using predictions new set data See examples details","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Partial dependence — orsf_pd_oob","text":"","code":"orsf_pd_oob(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   prob_values = c(0.025, 0.5, 0.975),   prob_labels = c(\"lwr\", \"medn\", \"upr\"),   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_pd_inb(   object,   pred_spec,   pred_horizon = NULL,   pred_type = NULL,   expand_grid = TRUE,   prob_values = c(0.025, 0.5, 0.975),   prob_labels = c(\"lwr\", \"medn\", \"upr\"),   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_pd_new(   object,   pred_spec,   new_data,   pred_horizon = NULL,   pred_type = NULL,   na_action = \"fail\",   expand_grid = TRUE,   prob_values = c(0.025, 0.5, 0.975),   prob_labels = c(\"lwr\", \"medn\", \"upr\"),   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Partial dependence — orsf_pd_oob","text":"object (ObliqueForest) trained oblique random forest object (see orsf). pred_spec (named list, pspec_auto, data.frame). pred_spec named list, item list vector values used points partial dependence function. name item list indicate variable modified take corresponding values. pred_spec created using pred_spec_auto(), needed names variables use (see pred_spec_auto). pred_spec data.frame, columns indicate variable names, values indicate variable values, partial dependence computed using inputs row. pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_type (character) type predictions compute. Valid Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class regression: 'mean': predicted mean, .e., expected value expand_grid (logical) TRUE, partial dependence computed possible combinations inputs pred_spec. FALSE, partial dependence computed variable pred_spec, separately. prob_values (numeric) vector values 0 1, indicating quantiles used summarize partial dependence values set inputs. prob_values length prob_labels. quantiles calculated based predictions object set values indicated pred_spec. prob_labels (character) vector labels length prob_values, label indicating corresponding value prob_values labelled summarized outputs. prob_labels length prob_values. boundary_checks (logical) TRUE, pred_spec checked make sure requested values 10th 90th percentile object's training data. FALSE, checks skipped. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used). new_data data.frame, tibble, data.table compute predictions . na_action (character) happen new_data contains missing values (.e., NA values). Valid options : 'fail' : error thrown new_data contains NA values 'omit' : rows new_data incomplete data dropped","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Partial dependence — orsf_pd_oob","text":"data.table containing partial dependence values specified variable(s) , relevant, specified prediction horizon(s).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Partial dependence — orsf_pd_oob","text":"Partial dependence number known limitations assumptions users aware (see Hooker, 2021). particular, partial dependence less intuitive >2 predictors examined jointly, assumed feature(s) partial dependence computed correlated features (likely true many cases). Accumulated local effect plots can used (see ) case feature independence valid assumption.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Partial dependence — orsf_pd_oob","text":"can compute partial dependence individual conditional expectations three ways: using -bag predictions training data. -bag partial dependence indicates relationships model learned training. helpful goal interpret model. using --bag predictions training data. --bag partial dependence indicates relationships model learned training using --bag data simulates application model new data. helpful want test model’s reliability fairness new data don’t access large testing set. using predictions new set data. New data partial dependence shows model predicts outcomes observations seen. helpful want test model’s reliability fairness.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"classification","dir":"Reference","previous_headings":"","what":"Classification","title":"Partial dependence — orsf_pd_oob","text":"Begin fitting oblique classification random forest:   Compute partial dependence using --bag data flipper_length_mm = c(190, 210).     Note predicted probabilities returned class probabilities mean column sum 1 take sum class specific value pred_spec variables. example,     isn’t case median predicted probability!","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_oob <- orsf_pd_oob(fit_clsf, pred_spec = pred_spec)  pd_oob ## Key: <class> ##        class flipper_length_mm      mean         lwr       medn       upr ##       <fctr>             <num>     <num>       <num>      <num>     <num> ## 1:    Adelie               190 0.6176908 0.202278109 0.75856417 0.9810614 ## 2:    Adelie               210 0.4338528 0.019173811 0.56489202 0.8648110 ## 3: Chinstrap               190 0.2114979 0.017643385 0.15211271 0.7215181 ## 4: Chinstrap               210 0.1803019 0.020108201 0.09679464 0.7035053 ## 5:    Gentoo               190 0.1708113 0.001334861 0.02769695 0.5750201 ## 6:    Gentoo               210 0.3858453 0.068685035 0.20717073 0.9532853 sum(pd_oob[flipper_length_mm == 190, mean]) ## [1] 1 sum(pd_oob[flipper_length_mm == 190, medn]) ## [1] 0.9383738"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"regression","dir":"Reference","previous_headings":"","what":"Regression","title":"Partial dependence — orsf_pd_oob","text":"Begin fitting oblique regression random forest:   Compute partial dependence using new data flipper_length_mm = c(190, 210).     can also let pred_spec_auto pick reasonable values like :     default, combinations variables used. However, can also look variables one one, separately, like :     can also bypass bells whistles using data.frame pred_spec. (Just make sure request values exist training data.)","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) pred_spec <- list(flipper_length_mm = c(190, 210))  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new ##    flipper_length_mm     mean      lwr     medn      upr ##                <num>    <num>    <num>    <num>    <num> ## 1:               190 42.96571 37.09805 43.69769 48.72301 ## 2:               210 45.66012 40.50693 46.31577 51.65163 pred_spec = pred_spec_auto(species, island, body_mass_g)  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new ##       species    island body_mass_g     mean      lwr     medn      upr ##        <fctr>    <fctr>       <num>    <num>    <num>    <num>    <num> ##  1:    Adelie    Biscoe        3200 40.31374 37.24373 40.31967 44.22824 ##  2: Chinstrap    Biscoe        3200 45.10582 42.63342 45.10859 47.60119 ##  3:    Gentoo    Biscoe        3200 42.81649 40.19221 42.55664 46.84035 ##  4:    Adelie     Dream        3200 40.16219 36.95895 40.34633 43.90681 ##  5: Chinstrap     Dream        3200 46.21778 43.53954 45.90929 49.19173 ## ---                                                                     ## 41: Chinstrap     Dream        5300 48.48139 46.36282 48.25679 51.02996 ## 42:    Gentoo     Dream        5300 45.91819 43.62832 45.54110 49.91622 ## 43:    Adelie Torgersen        5300 42.92879 40.66576 42.31072 46.76406 ## 44: Chinstrap Torgersen        5300 46.59576 44.80400 46.49196 49.03906 ## 45:    Gentoo Torgersen        5300 45.11384 42.95190 44.51289 49.27629 pd_new <- orsf_pd_new(fit_regr,                        expand_grid = FALSE,                       pred_spec = pred_spec,                       new_data = penguins_orsf_test)  pd_new ##        variable value     level     mean      lwr     medn      upr ##          <char> <num>    <char>    <num>    <num>    <num>    <num> ##  1:     species    NA    Adelie 41.90271 37.10417 41.51723 48.51478 ##  2:     species    NA Chinstrap 47.11314 42.40419 46.96478 51.51392 ##  3:     species    NA    Gentoo 44.37038 39.87306 43.89889 51.21635 ##  4:      island    NA    Biscoe 44.21332 37.22711 45.27862 51.21635 ##  5:      island    NA     Dream 44.43354 37.01471 45.57261 51.51392 ##  6:      island    NA Torgersen 43.29539 37.01513 44.26924 49.84391 ##  7: body_mass_g  3200      <NA> 42.84625 37.03978 43.95991 49.19173 ##  8: body_mass_g  3550      <NA> 43.53326 37.56730 44.43756 50.47092 ##  9: body_mass_g  3975      <NA> 44.30431 38.31567 45.22089 51.50683 ## 10: body_mass_g  4700      <NA> 45.22559 39.88199 46.34680 51.18955 ## 11: body_mass_g  5300      <NA> 45.91412 40.84742 46.95327 51.48851 custom_pred_spec <- data.frame(species = 'Adelie',                                 island = 'Biscoe')  pd_new <- orsf_pd_new(fit_regr,                        pred_spec = custom_pred_spec,                       new_data = penguins_orsf_test)  pd_new ##    species island     mean      lwr     medn      upr ##     <fctr> <fctr>    <num>    <num>    <num>    <num> ## 1:  Adelie Biscoe 41.98024 37.22711 41.65252 48.51478"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"survival","dir":"Reference","previous_headings":"","what":"Survival","title":"Partial dependence — orsf_pd_oob","text":"Begin fitting oblique survival random forest:   Compute partial dependence using -bag data bili = c(1,2,3,4,5):     don’t specific values variable mind, let pred_spec_auto pick :     Specify pred_horizon get partial dependence value:     vector-valued pred_horizon input comes minimal extra computational cost. Use fine grid time values assess whether predictors time-varying effects. (see partial dependence vignette example)","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) pd_train <- orsf_pd_inb(fit_surv, pred_spec = list(bili = 1:5)) pd_train ##    pred_horizon  bili      mean        lwr      medn       upr ##           <num> <num>     <num>      <num>     <num>     <num> ## 1:      1826.25     1 0.2566200 0.02234786 0.1334170 0.8918909 ## 2:      1826.25     2 0.3121392 0.06853733 0.1896849 0.9204338 ## 3:      1826.25     3 0.3703242 0.11409793 0.2578505 0.9416791 ## 4:      1826.25     4 0.4240692 0.15645214 0.3331057 0.9591581 ## 5:      1826.25     5 0.4663670 0.20123406 0.3841700 0.9655296 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili)) pd_train ##    pred_horizon  bili      mean        lwr      medn       upr ##           <num> <num>     <num>      <num>     <num>     <num> ## 1:      1826.25  0.55 0.2481444 0.02035041 0.1242215 0.8801444 ## 2:      1826.25  0.70 0.2502831 0.02045039 0.1271039 0.8836536 ## 3:      1826.25  1.50 0.2797763 0.03964900 0.1601715 0.9041584 ## 4:      1826.25  3.50 0.3959349 0.13431288 0.2920400 0.9501230 ## 5:      1826.25  7.25 0.5351935 0.28064629 0.4652185 0.9783000 pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili),                         pred_horizon = seq(500, 3000, by = 500)) pd_train ##     pred_horizon  bili      mean         lwr       medn       upr ##            <num> <num>     <num>       <num>      <num>     <num> ##  1:          500  0.55 0.0617199 0.000443399 0.00865419 0.5907104 ##  2:         1000  0.55 0.1418501 0.005793742 0.05572853 0.7360749 ##  3:         1500  0.55 0.2082505 0.013609478 0.09174558 0.8556319 ##  4:         2000  0.55 0.2679017 0.023047689 0.14574169 0.8910549 ##  5:         2500  0.55 0.3179617 0.063797305 0.20254500 0.9017710 ## ---                                                               ## 26:         1000  7.25 0.3264627 0.135343689 0.25956791 0.8884333 ## 27:         1500  7.25 0.4641265 0.218208755 0.38787435 0.9702903 ## 28:         2000  7.25 0.5511761 0.293367409 0.48427730 0.9812413 ## 29:         2500  7.25 0.6200238 0.371965247 0.56954399 0.9845058 ## 30:         3000  7.25 0.6803482 0.425128031 0.64642318 0.9888637"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_pd_oob.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Partial dependence — orsf_pd_oob","text":"Hooker, Giles, Mentch, Lucas, Zhou, Siyu (2021). \"Unrestricted permutation forces extrapolation: variable importance requires least one model, free variable importance.\" Statistics Computing, 31, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":null,"dir":"Reference","previous_headings":"","what":"Scale input data — orsf_scale_cph","title":"Scale input data — orsf_scale_cph","text":"functions exported users may access internal routines used scale inputs orsf_control_cph used.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Scale input data — orsf_scale_cph","text":"","code":"orsf_scale_cph(x_mat, w_vec = NULL)  orsf_unscale_cph(x_mat)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Scale input data — orsf_scale_cph","text":"x_mat (numeric matrix) matrix values scaled unscaled. Note orsf_unscale_cph accept x_mat inputs attribute containing transform values, added automatically orsf_scale_cph. w_vec (numeric vector) optional vector weights. weights supplied (default), observations equally weighted. supplied, w_vec must length equal nrow(x_mat).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Scale input data — orsf_scale_cph","text":"scaled unscaled x_mat.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Scale input data — orsf_scale_cph","text":"data transformed first subtracting mean multiplying scale. inverse transform can completed using orsf_unscale_cph dividing column corresponding scale adding mean. values means scales stored attribute output returned orsf_scale_cph (see examples)","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_scale_cph.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Scale input data — orsf_scale_cph","text":"","code":"x_mat <- as.matrix(pbc_orsf[, c('bili', 'age', 'protime')])  head(x_mat) #>   bili      age protime #> 1 14.5 58.76523    12.2 #> 2  1.1 56.44627    10.6 #> 3  1.4 70.07255    12.0 #> 4  1.8 54.74059    10.3 #> 5  3.4 38.10541    10.9 #> 7  1.0 55.53457     9.7  x_scaled <- orsf_scale_cph(x_mat)  head(x_scaled) #>             bili        age    protime #> [1,]  3.77308887  1.0412574  1.9694656 #> [2,] -0.75476469  0.7719344 -0.1822316 #> [3,] -0.65339483  2.3544852  1.7005035 #> [4,] -0.51823502  0.5738373 -0.5856748 #> [5,]  0.02240421 -1.3581657  0.2212116 #> [6,] -0.78855464  0.6660494 -1.3925613  attributes(x_scaled) # note the transforms attribute #> $dim #> [1] 276   3 #>  #> $dimnames #> $dimnames[[1]] #> NULL #>  #> $dimnames[[2]] #> [1] \"bili\"    \"age\"     \"protime\" #>  #>  #> $transforms #>           mean     scale #> [1,]  3.333696 0.3378995 #> [2,] 49.799661 0.1161396 #> [3,] 10.735507 1.3448108 #>   x_unscaled <- orsf_unscale_cph(x_scaled)  head(x_unscaled) #>      bili      age protime #> [1,] 14.5 58.76523    12.2 #> [2,]  1.1 56.44627    10.6 #> [3,]  1.4 70.07255    12.0 #> [4,]  1.8 54.74059    10.3 #> [5,]  3.4 38.10541    10.9 #> [6,]  1.0 55.53457     9.7  # numeric difference in x_mat and x_unscaled should be practically 0 max(abs(x_mat - x_unscaled)) #> [1] 3.552714e-15"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":null,"dir":"Reference","previous_headings":"","what":"Univariate summary — orsf_summarize_uni","title":"Univariate summary — orsf_summarize_uni","text":"Summarize univariate information ORSF object","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Univariate summary — orsf_summarize_uni","text":"","code":"orsf_summarize_uni(   object,   n_variables = NULL,   pred_horizon = NULL,   pred_type = NULL,   importance = NULL,   class = NULL,   verbose_progress = FALSE,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Univariate summary — orsf_summarize_uni","text":"object (ObliqueForest) trained oblique random forest object (see orsf). n_variables (integer) many variables summarized? Setting input lower number reduce computation time. pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_type (character) type predictions compute. Valid Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class regression: 'mean': predicted mean, .e., expected value importance (character) Indicate method variable importance: 'none': variable importance computed. 'anova': compute analysis variance (ANOVA) importance 'negate': compute negation importance 'permute': compute permutation importance class (character) relevant classification forests. NULL (default), summary statistics returned classes outcome, printed summaries show last class class levels. specify single class summarize, indicate name class class. E.g., categorical outcome class levels , B,  C, using class = \"\" restrict output class . details methods, see orsf_vi. verbose_progress (logical) TRUE, progress printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Univariate summary — orsf_summarize_uni","text":"object class 'orsf_summary', includes data importance individual predictors. expected values predictions specific values predictors.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Univariate summary — orsf_summarize_uni","text":"pred_horizon left unspecified, median value time--event variable object's training data used. recommended always specify prediction horizon, median time may especially meaningful horizon compute predicted risk values . object already variable importance values, can safely bypass computation variable importance function setting importance = 'none'.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_summarize_uni.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Univariate summary — orsf_summarize_uni","text":"","code":"object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 25)  # since anova importance was used to make object, it is also # used for ranking variables in the summary, unless we specify # a different type of importance  orsf_summarize_uni(object, n_variables = 2) #>  #> -- ascites (VI Rank: 1) ------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3015582 0.2260172 0.05061925 0.5094704 #>       1 0.4399066 0.3806282 0.21066049 0.6766038 #>  #> -- bili (VI Rank: 2) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2513342 0.1518283 0.04385794 0.3892157 #>    0.80 0.2550326 0.1567469 0.04852839 0.3913328 #>    1.40 0.2728825 0.1957005 0.06644606 0.4149212 #>    3.52 0.3575481 0.3080642 0.13239033 0.5404010 #>    7.25 0.4546721 0.4081686 0.25099206 0.6485875 #>  #>  Predicted risk at time t = 1788 for top 2 predictors   # if we want to summarize object according to variables # ranked by negation importance, we can compute negation # importance within orsf_summarize_uni() as follows:  orsf_summarize_uni(object, n_variables = 2, importance = 'negate') #>  #> -- bili (VI Rank: 1) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2513342 0.1518283 0.04385794 0.3892157 #>    0.80 0.2550326 0.1567469 0.04852839 0.3913328 #>    1.40 0.2728825 0.1957005 0.06644606 0.4149212 #>    3.52 0.3575481 0.3080642 0.13239033 0.5404010 #>    7.25 0.4546721 0.4081686 0.25099206 0.6485875 #>  #> -- copper (VI Rank: 2) -------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    25.5 0.2443431 0.1635872 0.04039580 0.3885730 #>    42.8 0.2532196 0.1577342 0.04094236 0.3816909 #>    74.0 0.2825893 0.1941957 0.05571884 0.4381673 #>     129 0.3404826 0.2643601 0.12559610 0.5036362 #>     214 0.4148122 0.3460876 0.21514558 0.6128151 #>  #>  Predicted risk at time t = 1788 for top 2 predictors   # for multi-category fits, you can specify which class # you want to summarize:  object =  orsf(species ~ ., data = penguins_orsf, n_tree = 25)  orsf_summarize_uni(object, class = \"Adelie\", n_variables = 1) #>  #> -- bill_length_mm (VI Rank: 1) ------------------- #>  #>         |------------- Probability -------------| #>   Value      Mean     Median     25th %    75th % #>  <char>     <num>      <num>      <num>     <num> #>    36.6 0.6830776 0.84544074 0.32069482 0.9803987 #>    39.5 0.6482196 0.81423080 0.26784874 0.9706868 #>    44.5 0.3577357 0.30291802 0.01926994 0.6328028 #>    48.6 0.1951207 0.13157895 0.01474271 0.3016239 #>    50.8 0.1460624 0.08585581 0.01316055 0.2415948 #>  #>  Predicted probability for top 1 predictors"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":null,"dir":"Reference","previous_headings":"","what":"Estimate training time — orsf_time_to_train","title":"Estimate training time — orsf_time_to_train","text":"Estimate training time","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Estimate training time — orsf_time_to_train","text":"","code":"orsf_time_to_train(object, n_tree_subset = NULL)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Estimate training time — orsf_time_to_train","text":"object untrained aorsf object n_tree_subset (integer)  many trees fit order estimate time needed train object. default value 10% trees specified object. .e., object n_tree 500, default value n_tree_subset 50.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Estimate training time — orsf_time_to_train","text":"difftime object.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_time_to_train.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Estimate training time — orsf_time_to_train","text":"","code":"# specify but do not train the model by setting no_fit = TRUE. object <- orsf(pbc_orsf, Surv(time, status) ~ . - id,                n_tree = 10, no_fit = TRUE)  # approximate the time it will take to grow 10 trees time_estimated <- orsf_time_to_train(object, n_tree_subset=1)  print(time_estimated) #> Time difference of 0.04352093 secs  # let's see how close the approximation was time_true_start <- Sys.time() orsf_train(object) time_true_stop <- Sys.time()  time_true <- time_true_stop - time_true_start  print(time_true) #> Time difference of 0.05836177 secs  # error abs(time_true - time_estimated) #> Time difference of 0.01484084 secs"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":null,"dir":"Reference","previous_headings":"","what":"Update Forest Parameters — orsf_update","title":"Update Forest Parameters — orsf_update","text":"Update Forest Parameters","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Update Forest Parameters — orsf_update","text":"","code":"orsf_update(object, ..., modify_in_place = FALSE, no_fit = NULL)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Update Forest Parameters — orsf_update","text":"object (ObliqueForest) oblique random forest object (see orsf). ... arguments plug orsf used define update. arguments include: data formula control weights n_tree n_split n_retry n_thread mtry sample_with_replacement sample_fraction leaf_min_events leaf_min_obs split_rule split_min_events split_min_obs split_min_stat pred_type oobag_pred_horizon oobag_eval_every oobag_fun importance importance_max_pvalue group_factors tree_seeds na_action verbose_progress Note can update control, change type forest. example, go classification regression orsf_update. modify_in_place (logical) TRUE, object modified inputs specified .... cautious, modification place overwrite existing data. FALSE (default), object copied modifications applied copy, leaving original object unmodified. no_fit (logical) TRUE, model fitting steps defined saved, training initiated. object returned can directly submitted orsf_train() long attach_data TRUE.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Update Forest Parameters — orsf_update","text":"ObliqueForest object.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Update Forest Parameters — orsf_update","text":"several dynamic inputs orsf default values NULL. Specifically, inputs control, weights, mtry, split_rule, split_min_stat, pred_type, pred_horizon, oobag_eval_function, tree_seeds, oobag_eval_every. explicit value given inputs call, re-formed. example, initial forest includes 17 predictors, default mtry smallest integer greater equal square root 17, .e., 5. , make updated forest 1 less predictor explicitly say mtry = 5, mtry re-initialized update based available 16 predictors, resulting value mtry 4. done avoid many potential errors occur dynamic outputs re-initialized.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_update.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Update Forest Parameters — orsf_update","text":"","code":"if (FALSE) { # initial fit has mtry of 5 fit <- orsf(pbc_orsf, time + status ~ . -id)  # note that mtry is now 4 (see details) fit_new <- orsf_update(fit, formula = . ~ . - edema, n_tree = 100)  # prevent dynamic updates by specifying inputs you want to freeze. fit_newer <- orsf_update(fit_new, mtry = 2) }"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":null,"dir":"Reference","previous_headings":"","what":"Variable Importance — orsf_vi","title":"Variable Importance — orsf_vi","text":"Estimate importance individual predictor variables using oblique random forests.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variable Importance — orsf_vi","text":"","code":"orsf_vi(   object,   group_factors = TRUE,   importance = NULL,   oobag_fun = NULL,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_vi_negate(   object,   group_factors = TRUE,   oobag_fun = NULL,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_vi_permute(   object,   group_factors = TRUE,   oobag_fun = NULL,   n_thread = NULL,   verbose_progress = NULL,   ... )  orsf_vi_anova(object, group_factors = TRUE, verbose_progress = NULL, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variable Importance — orsf_vi","text":"object (ObliqueForest) trained oblique random forest object (see orsf). group_factors (logical) TRUE, importance factor variables reported overall aggregating importance individual levels factor. FALSE, importance individual factor levels returned. importance (character) Indicate method variable importance: 'anova': compute analysis variance (ANOVA) importance 'negate': compute negation importance 'permute': compute permutation importance oobag_fun (function) used evaluating --bag prediction accuracy negating coefficients (importance = 'negate') permuting values predictor (importance = 'permute') oobag_fun = NULL (default), evaluation statistic selected based tree type survival: Harrell's C-statistic (1982) classification: Area underneath ROC curve (AUC-ROC) regression: Traditional prediction R-squared use oobag_fun note following: oobag_fun three inputs: y_mat, w_vec, s_vec survival trees, y_mat two column matrix first column named 'time' second named 'status'. classification trees, y_mat matrix number columns = number distinct classes outcome. regression, y_mat matrix one column. s_vec numeric vector containing predictions oobag_fun return numeric output length 1 oobag_fun used created object initial value --bag prediction accuracy consistent values computed variable importance estimated. details, see --bag vignette. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variable Importance — orsf_vi","text":"orsf_vi functions return named numeric vector. Names vector predictor variables used object Values vector estimated importance given predictor. returned vector sorted highest lowest value, higher values indicating higher importance.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Variable Importance — orsf_vi","text":"ObliqueForest object grown importance = 'anova', 'negate', 'permute', output vector importance values based requested type importance. However, orsf_vi() can used compute variable importance growing forest compute different type importance. orsf_vi() general purpose function extract compute variable importance estimates ObliqueForest object (see orsf). orsf_vi_negate(), orsf_vi_permute(), orsf_vi_anova() wrappers orsf_vi(). way functions work depends whether object given already variable importance estimates (see examples).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"variable-importance-methods","dir":"Reference","previous_headings":"","what":"Variable importance methods","title":"Variable Importance — orsf_vi","text":"negation importance: variable assessed separately multiplying variable's coefficients -1 determining much model's performance changes. worse model's performance negating coefficients given variable, important variable. technique promising b/c require permutation emphasizes variables larger coefficients linear combinations, also relatively new studied much permutation importance. See Jaeger, (2023) details technique. permutation importance: variable assessed separately randomly permuting variable's values determining much model's performance changes. worse model's performance permuting values given variable, important variable. technique flexible, intuitive, frequently used. also several known limitations analysis variance (ANOVA) importance: p-value computed coefficient linear combination variables decision tree. Importance individual predictor variable proportion times p-value coefficient < 0.01. technique efficient computationally, may effective permutation negation terms selecting signal noise variables. See Menze, 2011 details technique.","code":""},{"path":[]},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"anova-importance","dir":"Reference","previous_headings":"","what":"ANOVA importance","title":"Variable Importance — orsf_vi","text":"default variable importance technique, ANOVA, calculated fit oblique random forest ensemble.     ANOVA default fast, may decisive permutation negation techniques variable selection.","code":"fit <- orsf(pbc_orsf, Surv(time, status) ~ . - id)  fit ## ---------- Oblique random survival forest ##  ##      Linear combinations: Accelerated Cox regression ##           N observations: 276 ##                 N events: 111 ##                  N trees: 500 ##       N predictors total: 17 ##    N predictors per node: 5 ##  Average leaves per tree: 21.022 ## Min observations in leaf: 5 ##       Min events in leaf: 1 ##           OOB stat value: 0.84 ##            OOB stat type: Harrell's C-index ##      Variable importance: anova ##  ## -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"raw-vi-values","dir":"Reference","previous_headings":"","what":"Raw VI values","title":"Variable Importance — orsf_vi","text":"‘raw’ variable importance values can accessed fit object     ‘raw’ values factors aggregated single value. Currently one value k-1 levels k level factor. example, can see edema_1 edema_0.5 importance values edema factor variable levels 0, 0.5, 1.","code":"fit$get_importance_raw() ##                   [,1] ## trt_placebo 0.06355042 ## age         0.23259259 ## sex_f       0.14700432 ## ascites_1   0.46791708 ## hepato_1    0.14349776 ## spiders_1   0.17371938 ## edema_0.5   0.17459191 ## edema_1     0.51197605 ## bili        0.40590758 ## chol        0.17666667 ## albumin     0.25972156 ## copper      0.28840580 ## alk.phos    0.10614251 ## ast         0.18327491 ## trig        0.12815626 ## platelet    0.09265648 ## protime     0.22656250 ## stage       0.20264766"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"collapse-vi-across-factor-levels","dir":"Reference","previous_headings":"","what":"Collapse VI across factor levels","title":"Variable Importance — orsf_vi","text":"get aggregated values across levels factor, access importance element orsf fit:     use orsf_vi() group_factors set TRUE (default)     Note can make default returned importance values ungrouped setting group_factors FALSE orsf_vi functions orsf function.","code":"# this assumes you used group_factors = TRUE in orsf() fit$importance ##    ascites       bili      edema     copper    albumin        age    protime  ## 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250  ##      stage        ast       chol    spiders        sex     hepato       trig  ## 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626  ##   alk.phos   platelet        trt  ## 0.10614251 0.09265648 0.06355042 orsf_vi(fit) ##    ascites       bili      edema     copper    albumin        age    protime  ## 0.46791708 0.40590758 0.31115216 0.28840580 0.25972156 0.23259259 0.22656250  ##      stage        ast       chol    spiders        sex     hepato       trig  ## 0.20264766 0.18327491 0.17666667 0.17371938 0.14700432 0.14349776 0.12815626  ##   alk.phos   platelet        trt  ## 0.10614251 0.09265648 0.06355042"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"add-vi-to-an-oblique-random-forest","dir":"Reference","previous_headings":"","what":"Add VI to an oblique random forest","title":"Variable Importance — orsf_vi","text":"can fit oblique random forest without VI, add VI later","code":"fit_no_vi <- orsf(pbc_orsf,                   Surv(time, status) ~ . - id,                   importance = 'none')  # Note: you can't call orsf_vi_anova() on fit_no_vi because anova # VI can only be computed while the forest is being grown.  orsf_vi_negate(fit_no_vi) ##        bili      copper         sex     protime         age       stage  ## 0.130439814 0.051880867 0.038308025 0.025115249 0.023826061 0.020354822  ##     albumin     ascites        chol         ast     spiders      hepato  ## 0.019997729 0.015918292 0.013320469 0.010086726 0.007409116 0.007326714  ##       edema         trt    alk.phos        trig    platelet  ## 0.006844435 0.003214544 0.002517057 0.002469545 0.001056829 orsf_vi_permute(fit_no_vi) ##          bili        copper           age       ascites       protime  ##  0.0592069141  0.0237362075  0.0136479213  0.0130805894  0.0123091354  ##         stage       albumin          chol        hepato           ast  ##  0.0117177661  0.0106414724  0.0064501213  0.0058813969  0.0057753740  ##         edema       spiders           sex          trig      platelet  ##  0.0052171180  0.0048427005  0.0023386947  0.0017883700  0.0013533691  ##      alk.phos           trt  ##  0.0006492029 -0.0009921507"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"oblique-random-forest-and-vi-all-at-once","dir":"Reference","previous_headings":"","what":"Oblique random forest and VI all at once","title":"Variable Importance — orsf_vi","text":"fit oblique random forest compute vi time     can still get negation VI fit, needs computed","code":"fit_permute_vi <- orsf(pbc_orsf,                        Surv(time, status) ~ . - id,                        importance = 'permute')  # get the vi instantly (i.e., it doesn't need to be computed again) orsf_vi_permute(fit_permute_vi) ##          bili        copper       ascites       protime       albumin  ##  0.0571305446  0.0243657146  0.0138318057  0.0133401675  0.0130746154  ##           age         stage          chol           ast       spiders  ##  0.0123610374  0.0102963203  0.0077895394  0.0075250059  0.0048628813  ##         edema        hepato           sex      platelet          trig  ##  0.0046003168  0.0039818730  0.0016891584  0.0012767063  0.0007324402  ##      alk.phos           trt  ##  0.0005128897 -0.0014443967 orsf_vi_negate(fit_permute_vi) ##        bili      copper         sex     protime       stage         age  ## 0.123331760 0.052544318 0.037291358 0.024977898 0.023239189 0.021934511  ##     albumin     ascites        chol         ast     spiders       edema  ## 0.020586632 0.014229536 0.014053040 0.012227048 0.007643156 0.006832766  ##      hepato         trt    alk.phos        trig    platelet  ## 0.006301693 0.004348705 0.002371797 0.002309396 0.001347035"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"custom-functions-for-vi","dir":"Reference","previous_headings":"","what":"Custom functions for VI","title":"Variable Importance — orsf_vi","text":"default prediction accuracy functions work well time:     sometimes want something specific defaults just won’t work. cases, can compute VI function ’d like measure prediction accuracy supplying valid function oobag_fun input. example, use mean absolute error . Higher values considered good aorsf computes prediction accuracy, make function return pseudo R-squared based mean absolute error:","code":"fit_standard <- orsf(penguins_orsf, bill_length_mm ~ ., tree_seeds = 1)  # Default method for prediction accuracy with VI is R-squared orsf_vi_permute(fit_standard) ##           species flipper_length_mm       body_mass_g     bill_depth_mm  ##      0.3725898166      0.3261834607      0.2225730676      0.1026569498  ##            island               sex              year  ##      0.0876071687      0.0844807334      0.0006978493 rsq_mae <- function(y_mat, w_vec, s_vec){    mae_standard <- mean(abs((y_mat - mean(y_mat)) * w_vec))  mae_fit <- mean(abs((y_mat - s_vec) * w_vec))    1 - mae_fit / mae_standard   }  fit_custom <- orsf_update(fit_standard, oobag_fun = rsq_mae)  # not much changes, but the difference between variables shrinks # and the ordering of sex and island has swapped orsf_vi_permute(fit_custom) ##           species flipper_length_mm       body_mass_g     bill_depth_mm  ##       0.206951751       0.193248912       0.140899603       0.076759148  ##               sex            island              year  ##       0.073042331       0.050851073       0.003633365"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vi.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Variable Importance — orsf_vi","text":"Harrell, E F, Califf, M R, Pryor, B D, Lee, L K, Rosati, R (1982). \"Evaluating yield medical tests.\" Jama, 247(18), 2543-2546. Breiman, Leo (2001). \"Random Forests.\" Machine Learning, 45(1), 5-32. ISSN 1573-0565. Menze, H B, Kelm, Michael B, Splitthoff, N D, Koethe, Ullrich, Hamprecht, F (2011). \"oblique random forests.\" Machine Learning Knowledge Discovery Databases: European Conference, ECML PKDD 2011, Athens, Greece, September 5-9, 2011, Proceedings, Part II 22, 453-469. Springer. Jaeger BC, Welden S, Lenoir K, Speiser JL, Segar MW, Pandey , Pajewski NM (2023). \"Accelerated interpretable oblique random survival forests.\" Journal Computational Graphical Statistics, 1-16.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":null,"dir":"Reference","previous_headings":"","what":"Variable Interactions — orsf_vint","title":"Variable Interactions — orsf_vint","text":"Use variable interaction score described Greenwell et al (2018). method can computationally demanding, using n_thread=0 can substantially reduce time needed compute scores.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variable Interactions — orsf_vint","text":"","code":"orsf_vint(   object,   predictors = NULL,   n_thread = NULL,   verbose_progress = NULL,   sep = \"..\" )"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variable Interactions — orsf_vint","text":"object (ObliqueForest) trained oblique random forest object (see orsf) predictors (character) vector length 2 names predictors used object. pairwise interactions predictors scored. NULL (default), predictors used. n_thread (integer) number threads use growing trees, computing predictions, computing importance. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. sep (character) separate names two predictors. default value \"..\" returns names name1..name2","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variable Interactions — orsf_vint","text":"data.table variable interaction scores partial dependence values.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Variable Interactions — orsf_vint","text":"number possible interactions grows exponentially based number predictors. caution warranted using large predictor sets recommended supply specific vector predictor names assess rather global search. good strategy use n_tree = 5 search predictors, pick top 10 interactions, get unique predictors , re-run just predictors trees.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Variable Interactions — orsf_vint","text":"Greenwell, M B, Boehmke, C B, McCarthy, J (2018). \"simple effective model-based variable importance measure.\" arXiv preprint arXiv:1805.04755.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vint.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Variable Interactions — orsf_vint","text":"","code":"set.seed(329)  data <- data.frame(  x1 = rnorm(500),  x2 = rnorm(500),  x3 = rnorm(500) )  data$y = with(data, expr = x1 + x2 + x3 + 1/2*x1 * x2 + x2 * x3 + rnorm(500))  forest <- orsf(data, y ~ ., n_tree = 5)  orsf_vint(forest) #>    interaction     score          pd_values #>         <char>     <num>             <list> #> 1:      x2..x3 0.8021932 <data.table[25x9]> #> 2:      x1..x2 0.5095065 <data.table[25x9]> #> 3:      x1..x3 0.1133252 <data.table[25x9]>"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":null,"dir":"Reference","previous_headings":"","what":"Variable selection — orsf_vs","title":"Variable selection — orsf_vs","text":"Variable selection","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Variable selection — orsf_vs","text":"","code":"orsf_vs(object, n_predictor_min = 3, verbose_progress = NULL)"},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Variable selection — orsf_vs","text":"object (ObliqueForest) trained oblique random forest object (see orsf). n_predictor_min (integer) minimum number predictors allowed verbose_progress (logical) implemented yet. progress printed console?","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Variable selection — orsf_vs","text":"data.table four columns: n_predictors: number predictors used stat_value: --bag statistic variables_included: names variables included predictors_included: names predictors included predictor_dropped: predictor selected dropped","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Variable selection — orsf_vs","text":"difference variables_included predictors_included referent coding. variable name factor variable training data, predictor name factor levels factor appended. example, variable diabetes levels = c(\"\", \"yes\"), variable name diabetes predictor name diabetes_yes. tree_seeds specified object successive run orsf evaluated --bag samples initial run.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/orsf_vs.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Variable selection — orsf_vs","text":"","code":"object <- orsf(formula = time + status ~ .,                data = pbc_orsf,                n_tree = 25,                importance = 'anova')  orsf_vs(object, n_predictor_min = 15) #>    n_predictors stat_value                        variables_included #>           <int>      <num>                                    <list> #> 1:           15  0.8356685     age,albumin,ascites,ast,bili,chol,... #> 2:           16  0.8351997     age,albumin,ascites,ast,bili,chol,... #> 3:           17  0.8296786     age,albumin,ascites,ast,bili,chol,... #> 4:           18  0.8185322 age,albumin,alk.phos,ascites,ast,bili,... #>                                predictors_included predictor_dropped #>                                             <list>            <char> #> 1:  id,age,sex_f,ascites_1,spiders_1,edema_0.5,...          platelet #> 2:   id,age,sex_f,ascites_1,hepato_1,spiders_1,...          hepato_1 #> 3: id,trt_placebo,age,sex_f,ascites_1,hepato_1,...       trt_placebo #> 4: id,trt_placebo,age,sex_f,ascites_1,hepato_1,...          alk.phos"},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":null,"dir":"Reference","previous_headings":"","what":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"data light modification survival::pbc data. modifications :","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"","code":"pbc_orsf"},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"data frame 276 rows 20 variables: id case number time number days registration earlier death, transplantion, study analysis July, 1986 status status endpoint, 0 censored transplant, 1 dead trt randomized treatment group: D-penicillmain placebo age years sex m/f ascites presence ascites hepato presence hepatomegaly enlarged liver spiders blood vessel malformations skin edema 0 edema, 0.5 untreated successfully treated, 1 edema despite diuretic therapy bili serum bilirubin (mg/dl) chol serum cholesterol (mg/dl) albumin serum albumin (g/dl) copper urine copper (ug/day) alk.phos alkaline phosphotase (U/liter) ast aspartate aminotransferase, called SGOT (U/ml) trig triglycerides (mg/dl) platelet platelet count protime standardized blood clotting time stage histologic stage disease (needs biopsy)","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"T Therneau P Grambsch (2000), Modeling Survival Data: Extending Cox Model, Springer-Verlag, New York. ISBN: 0-387-98784-3.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pbc_orsf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Mayo Clinic Primary Biliary Cholangitis Data — pbc_orsf","text":"removed rows missing data converted status 0 censor transplant, 1 dead converted stage ordered factor. converted trt, ascites, hepato, spiders, edema factors.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":null,"dir":"Reference","previous_headings":"","what":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"data copied lightly modified penguins data palmerpenguins R package. modification removal rows missing data. data include measurements penguin species, island Palmer Archipelago, size (flipper length, body mass, bill dimensions), sex.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"","code":"penguins_orsf"},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"tibble 333 rows 8 variables: species factor denoting penguin species (Adélie, Chinstrap Gentoo) island factor denoting island Palmer Archipelago, Antarctica (Biscoe, Dream Torgersen) bill_length_mm number denoting bill length (millimeters) bill_depth_mm number denoting bill depth (millimeters) flipper_length_mm integer denoting flipper length (millimeters) body_mass_g integer denoting body mass (grams) sex factor denoting penguin sex (female, male) year integer denoting study year (2007, 2008, 2009)","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/penguins_orsf.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Size measurements for adult foraging penguins near Palmer Station, Antarctica — penguins_orsf","text":"Adélie penguins: Palmer Station Antarctica LTER K. Gorman. 2020. Structural size measurements isotopic signatures foraging among adult male female Adélie penguins (Pygoscelis adeliae) nesting along Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative. doi:10.6073/pasta/98b16d7d563f265cb52372c8ca99e60f Gentoo penguins: Palmer Station Antarctica LTER K. Gorman. 2020. Structural size measurements isotopic signatures foraging among adult male female Gentoo penguin (Pygoscelis papua) nesting along Palmer Archipelago near Palmer Station, 2007-2009 ver 5. Environmental Data Initiative. doi:10.6073/pasta/7fca67fb28d56ee2ffa3d9370ebda689 Chinstrap penguins: Palmer Station Antarctica LTER K. Gorman. 2020. Structural size measurements isotopic signatures foraging among adult male female Chinstrap penguin (Pygoscelis antarcticus) nesting along Palmer Archipelago near Palmer Station, 2007-2009 ver 6. Environmental Data Initiative. doi:10.6073/pasta/c14dfcfada8ea13a17536e73eb6fbe9e Originally published : Gorman KB, Williams TD, Fraser WR (2014) Ecological Sexual Dimorphism Environmental Variability within Community Antarctic Penguins (Genus Pygoscelis). PLoS ONE 9(3): e90081. doi:10.1371/journal.pone.0090081","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":null,"dir":"Reference","previous_headings":"","what":"Automatic variable values for dependence — pred_spec_auto","title":"Automatic variable values for dependence — pred_spec_auto","text":"partial dependence individual conditional expectations, function allows variable considered without specify values set variable . values used based quantiles continuous variables (10th, 25th, 50th, 75th, 90th) unique categories categorical variables.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Automatic variable values for dependence — pred_spec_auto","text":"","code":"pred_spec_auto(...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Automatic variable values for dependence — pred_spec_auto","text":"... names variables use. can quotes quotes (see examples).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Automatic variable values for dependence — pred_spec_auto","text":"character vector names","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Automatic variable values for dependence — pred_spec_auto","text":"function used context orsf_pd orsf_ice functions.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/pred_spec_auto.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Automatic variable values for dependence — pred_spec_auto","text":"","code":"fit <- orsf(penguins_orsf, species ~., n_tree = 5)  orsf_pd_oob(fit, pred_spec_auto(flipper_length_mm)) #> Key: <class> #>         class flipper_length_mm      mean         lwr       medn   upr #>        <fctr>             <num>     <num>       <num>      <num> <num> #>  1:    Adelie               185 0.6510597 0.008691406 0.93333333     1 #>  2:    Adelie               190 0.6376856 0.007812500 0.93333333     1 #>  3:    Adelie               197 0.6051195 0.007812500 0.93170380     1 #>  4:    Adelie               213 0.4517576 0.007812500 0.48514851     1 #>  5:    Adelie               221 0.4441207 0.007812500 0.48514851     1 #>  6: Chinstrap               185 0.3277862 0.009615385 0.06848291     1 #>  7: Chinstrap               190 0.3462555 0.009615385 0.08347478     1 #>  8: Chinstrap               197 0.3591037 0.009615385 0.08670635     1 #>  9: Chinstrap               213 0.4371854 0.009900990 0.33333333     1 #> 10: Chinstrap               221 0.4010776 0.009900990 0.33333333     1 #> 11:    Gentoo               185 0.5947110 0.057954545 0.50000000     1 #> 12:    Gentoo               190 0.6487316 0.062500000 0.65885417     1 #> 13:    Gentoo               197 0.8075340 0.071428571 0.99218750     1 #> 14:    Gentoo               213 0.6520185 0.047619048 0.95000000     1 #> 15:    Gentoo               221 0.6525966 0.047619048 0.93828125     1"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":null,"dir":"Reference","previous_headings":"","what":"Prediction for ObliqueForest Objects — predict.ObliqueForest","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Compute predicted values oblique random forest. Predictions may returned aggregate (.e., averaging trees) tree-specific.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"","code":"# S3 method for ObliqueForest predict(   object,   new_data = NULL,   pred_type = NULL,   pred_horizon = NULL,   pred_aggregate = TRUE,   pred_simplify = FALSE,   oobag = FALSE,   na_action = NULL,   boundary_checks = TRUE,   n_thread = NULL,   verbose_progress = NULL,   ... )"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"object (ObliqueForest) trained oblique random forest object (see orsf). new_data data.frame, tibble, data.table compute predictions . pred_type (character) type predictions compute. Valid options survival : 'risk' : probability event pred_horizon. 'surv' : 1 - risk. 'chf': cumulative hazard function 'mort': mortality prediction 'time': survival time prediction classification: 'prob': probability class 'class': predicted class regression: 'mean': predicted mean, .e., expected value pred_horizon (double) relevent survival forests. value vector indicating time(s) predictions calibrated . E.g., predicting risk incident heart failure within next 10 years, pred_horizon = 10. pred_horizon can NULL pred_type 'mort', since mortality predictions aggregated event times pred_aggregate (logical) TRUE (default), predictions aggregated trees taking mean. FALSE, returned output contain one row per observation one column tree. length pred_horizon two pred_aggregate FALSE, result list matrices, 'th item list corresponding 'th value pred_horizon. pred_simplify (logical) FALSE (default), predictions always returned numeric matrix list numeric matrices. TRUE, predictions may simplified vector, e.g., pred_type 'mort' survival 'class' classification, array matrices length(pred_horizon) > 1. oobag (logical) FALSE (default), predictions computed using trees observation. TRUE, --bag predictions computed. input parameter set TRUE new_data NULL. na_action (character) happen new_data contains missing values (.e., NA values). Valid options : 'fail' : error thrown new_data contains NA values 'pass' : output NA rows new_data 1 NA value predictors used object 'omit' : rows new_data incomplete data dropped 'impute_meanmode' : missing values continuous categorical variables new_data imputed using mean mode, respectively. clarify, mean mode used impute missing values training data object, new_data. boundary_checks (logical) TRUE, pred_horizon checked make sure requested values less maximum observed time object's training data. FALSE, checks skipped. n_thread (integer) number threads use computing predictions. Default 0, allows suitable number threads used based availability. verbose_progress (logical) TRUE, progress messages printed console. FALSE (default), nothing printed. ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"matrix predictions. Column j matrix corresponds value j pred_horizon. Row matrix corresponds row new_data.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"new_data must columns equivalent types data used train object. Also, factors new_data must levels data used train object. pred_horizon values exceed maximum follow-time object's training data, truly want , set boundary_checks = FALSE can use pred_horizon large want. Note predictions beyond maximum follow-time object's training data equal predictions maximum follow-time, aorsf estimate survival beyond maximum observed time. unspecified, pred_horizon may automatically specified value used oobag_pred_horizon object created (see orsf).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"","code":"library(aorsf)"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"classification","dir":"Reference","previous_headings":"","what":"Classification","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Predict probability class predicted class:","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_clsf <- orsf(data = penguins_orsf_train,                   formula = species ~ .) # predicted probabilities, the default predict(fit_clsf,          new_data = penguins_orsf_test[1:5, ],         pred_type = 'prob') ##         Adelie  Chinstrap      Gentoo ## [1,] 0.9405310 0.04121955 0.018249405 ## [2,] 0.9628988 0.03455909 0.002542096 ## [3,] 0.9032074 0.08510528 0.011687309 ## [4,] 0.9300133 0.05209040 0.017896329 ## [5,] 0.7965703 0.16243492 0.040994821 # predicted class (as a matrix by default) predict(fit_clsf,          new_data = penguins_orsf_test[1:5, ],         pred_type = 'class') ##      [,1] ## [1,]    1 ## [2,]    1 ## [3,]    1 ## [4,]    1 ## [5,]    1 # predicted class (as a factor if you use simplify) predict(fit_clsf,          new_data = penguins_orsf_test[1:5, ],         pred_type = 'class',         pred_simplify = TRUE) ## [1] Adelie Adelie Adelie Adelie Adelie ## Levels: Adelie Chinstrap Gentoo"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"regression","dir":"Reference","previous_headings":"","what":"Regression","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Predict mean value outcome:","code":"set.seed(329)  index_train <- sample(nrow(penguins_orsf), 150)   penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ]  fit_regr <- orsf(data = penguins_orsf_train,                   formula = bill_length_mm ~ .) predict(fit_regr,          new_data = penguins_orsf_test[1:5, ],          pred_type = 'mean') ##          [,1] ## [1,] 37.74136 ## [2,] 37.42367 ## [3,] 37.04598 ## [4,] 39.89602 ## [5,] 39.14848"},{"path":"https://bcjaeger.github.io/aorsf/reference/predict.ObliqueForest.html","id":"survival","dir":"Reference","previous_headings":"","what":"Survival","title":"Prediction for ObliqueForest Objects — predict.ObliqueForest","text":"Begin fitting oblique survival random forest:   Predict risk, survival, cumulative hazard one several times:             Predict mortality, defined number events forest’s population observations characteristics like current observation. type prediction require specify prediction horizon","code":"set.seed(329)  index_train <- sample(nrow(pbc_orsf), 150)   pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ]  fit_surv <- orsf(data = pbc_orsf_train,                   formula = Surv(time, status) ~ . - id,                  oobag_pred_horizon = 365.25 * 5) # predicted risk, the default predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'risk',          pred_horizon = c(500, 1000, 1500)) ##             [,1]        [,2]       [,3] ## [1,] 0.013648562 0.058393393 0.11184029 ## [2,] 0.003811413 0.026857586 0.04774151 ## [3,] 0.030548361 0.100600301 0.14847107 ## [4,] 0.040381075 0.169596943 0.27018952 ## [5,] 0.001484698 0.006663576 0.01337655 # predicted survival, i.e., 1 - risk predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'surv',         pred_horizon = c(500, 1000, 1500)) ##           [,1]      [,2]      [,3] ## [1,] 0.9863514 0.9416066 0.8881597 ## [2,] 0.9961886 0.9731424 0.9522585 ## [3,] 0.9694516 0.8993997 0.8515289 ## [4,] 0.9596189 0.8304031 0.7298105 ## [5,] 0.9985153 0.9933364 0.9866235 # predicted cumulative hazard function # (expected number of events for person i at time j) predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'chf',         pred_horizon = c(500, 1000, 1500)) ##             [,1]        [,2]       [,3] ## [1,] 0.015395388 0.067815817 0.14942956 ## [2,] 0.004022524 0.028740305 0.05424314 ## [3,] 0.034832754 0.127687156 0.20899732 ## [4,] 0.059978334 0.233048809 0.42562310 ## [5,] 0.001651365 0.007173177 0.01393016 predict(fit_surv,          new_data = pbc_orsf_test[1:5, ],          pred_type = 'mort') ##           [,1] ## [1,] 23.405016 ## [2,] 15.362916 ## [3,] 26.180648 ## [4,] 36.515629 ## [5,]  5.856674"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":null,"dir":"Reference","previous_headings":"","what":"Inspect Forest Parameters — print.ObliqueForest","title":"Inspect Forest Parameters — print.ObliqueForest","text":"Printing ORSF model tells : Linear combinations: identified? N observations: Number rows training data N events: Number events training data N trees: Number trees forest N predictors total: Total number columns predictor matrix N predictors per node: Number variables used linear combinations Average leaves per tree: proxy depth trees Min observations leaf: See leaf_min_obs orsf Min events leaf: See leaf_min_events orsf OOB stat value: --bag error fitting trees OOB stat type: --bag error computed? Variable importance: variable importance computed?","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Inspect Forest Parameters — print.ObliqueForest","text":"","code":"# S3 method for ObliqueForest print(x, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Inspect Forest Parameters — print.ObliqueForest","text":"x (ObliqueForest) oblique random survival forest (ORSF; see orsf). ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Inspect Forest Parameters — print.ObliqueForest","text":"x, invisibly.","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.ObliqueForest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Inspect Forest Parameters — print.ObliqueForest","text":"","code":"object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 5)  print(object) #> ---------- Oblique random survival forest #>  #>      Linear combinations: Accelerated Cox regression #>           N observations: 276 #>                 N events: 111 #>                  N trees: 5 #>       N predictors total: 17 #>    N predictors per node: 5 #>  Average leaves per tree: 20.8 #> Min observations in leaf: 5 #>       Min events in leaf: 1 #>           OOB stat value: 0.76 #>            OOB stat type: Harrell's C-index #>      Variable importance: anova #>  #> -----------------------------------------"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":null,"dir":"Reference","previous_headings":"","what":"Print ORSF summary — print.orsf_summary_uni","title":"Print ORSF summary — print.orsf_summary_uni","text":"Print ORSF summary","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Print ORSF summary — print.orsf_summary_uni","text":"","code":"# S3 method for orsf_summary_uni print(x, n_variables = NULL, ...)"},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Print ORSF summary — print.orsf_summary_uni","text":"x object class 'orsf_summary' n_variables number variables print ... arguments passed methods (currently used).","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Print ORSF summary — print.orsf_summary_uni","text":"invisibly, x","code":""},{"path":"https://bcjaeger.github.io/aorsf/reference/print.orsf_summary_uni.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Print ORSF summary — print.orsf_summary_uni","text":"","code":"object <- orsf(pbc_orsf, Surv(time, status) ~ . - id, n_tree = 25)  smry <- orsf_summarize_uni(object, n_variables = 2)  print(smry) #>  #> -- ascites (VI Rank: 1) ------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>       0 0.3087374 0.1859818 0.04183841 0.5614236 #>       1 0.4965395 0.4214549 0.30005793 0.7145065 #>  #> -- bili (VI Rank: 2) ---------------------------- #>  #>         |---------------- Risk ----------------| #>   Value      Mean    Median     25th %    75th % #>  <char>     <num>     <num>      <num>     <num> #>    0.60 0.2404645 0.1342975 0.03456875 0.3904944 #>    0.80 0.2425520 0.1435297 0.03507037 0.3904944 #>    1.40 0.2625349 0.1554767 0.04820122 0.4168304 #>    3.52 0.3795922 0.3167143 0.15794919 0.5811623 #>    7.25 0.4682454 0.4348054 0.25161269 0.6726243 #>  #>  Predicted risk at time t = 1788 for top 2 predictors"},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-015-unreleased","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.5 (unreleased)","title":"aorsf 0.1.5 (unreleased)","text":"fixed issue omitting NA values cause error regression forests.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-014","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.4","title":"aorsf 0.1.4","text":"CRAN release: 2024-05-03 orsf_vs now returns column contains non-reference coded variable names (see https://github.com/ropensci/aorsf/pull/52). orsf_vs longer throws error n_predictor_min = 1 used (see https://github.com/ropensci/aorsf/pull/58). orsf_summarize_uni now allows specification class summarize oblique classification forests (see https://github.com/ropensci/aorsf/pull/57). fixed issue orsf throw uninformative error predictors categorical (see https://github.com/ropensci/aorsf/pull/56) oblique random forests can now compute --bag predictions modified versions training data (see https://github.com/ropensci/aorsf/pull/54) Setting oobag_pred_type 'none' growing forest longer necessitates specification pred_type calling predict later (see https://github.com/ropensci/aorsf/pull/48). Setting sample_fraction 1 longer result empty oobag_rows forest object (cause R crash forest passed C++; see https://github.com/ropensci/aorsf/pull/48) Re-worked creation maintenance oobag_denom C++ routines (see https://github.com/ropensci/aorsf/pull/48). Restricted mean survival time now used pred_type = 'time' instead median survival time (See https://github.com/ropensci/aorsf/pull/46).","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-013","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.3","title":"aorsf 0.1.3","text":"CRAN release: 2024-01-22 minor changes partial dependence vignette resolve code sanitization errors.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-012","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.2","title":"aorsf 0.1.2","text":"CRAN release: 2024-01-15 Allowed option \"time\" pred_type predict partial dependence predict survival time (see https://github.com/ropensci/aorsf/issues/37). Added pred_spec_auto() convenient specification variables partial dependence. Partial dependence now runs much faster multiple threads. Added orsf_vint() compute variable interaction scores using partial dependence. Added orsf_update(), can copy modify obliqueForest modify place. Added orsf_control functions classification, regression, survival (https://github.com/ropensci/aorsf/pull/25). optimization implemented matrix multiplication prediction (https://github.com/ropensci/aorsf/pull/20)","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-011","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.1","title":"aorsf 0.1.1","text":"CRAN release: 2023-10-26 Fixed uninitialized value pd_type Fixed various issues related memory leaks","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-010","dir":"Changelog","previous_headings":"","what":"aorsf 0.1.0","title":"aorsf 0.1.0","text":"CRAN release: 2023-10-13 Re-worked internal C++ routines following design ranger. Re-worked progress printed console verbose_progress TRUE, following design ranger. Messages now indicate action taken, % complete, approximate time finishing action. Improved variable importance, following design ranger. Importance now computed tree--tree instead aggregate. Additionally, mortality type prediction used importance survival trees, since mortality depend pred_horizon. Allowed multi-threading performed orsf(), predict.orsf_fit(), functions orsf_vi() orsf_pd() family. Allowed sampling without replacement sampling specific fraction observations orsf() Included Harrell’s C-statistic option assessing goodness splits growing trees. Fixed issue uninformative error message occur pred_horizon > max(time) orsf_summarize_uni. Thanks @JyHao1 @DustinMLong finding !","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-007","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.7","title":"aorsf 0.0.7","text":"CRAN release: 2023-01-12 Additional changes internal testing avoid problems ATLAS","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-006","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.6","title":"aorsf 0.0.6","text":"CRAN release: 2023-01-06 Minor fix internal tests failing run ATLAS","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-005","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.5","title":"aorsf 0.0.5","text":"CRAN release: 2022-12-14 orsf() longer throws errors warnings try give single predictor. note added documentation details ?orsf explains using single predictor orsf() somewhat useless. done resolve https://github.com/mlr-org/mlr3extralearners/issues/259. predict.orsf_fit now accepts pred_horizon = 0 returns sensible values. Thanks @mattwarkentin feature request. added function perform variable selection, orsf_vs(). Made variable importance consistent respect group_factors. Originally, output orsf ungrouped VI values orsf_vi grouped values. update, orsf defaults grouped values. ungrouped values can still recovered. Fixed issue orsf_pd functions output data returned original scale.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-004","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.4","title":"aorsf 0.0.4","text":"CRAN release: 2022-11-07 orsf formulas now accepts Surv objects (see https://github.com/ropensci/aorsf/issues/11) Added verbose_progress input orsf, prints messages console indicating progress. Allowance missing values orsf. Mean mode imputation performed observations missing data. values can also used impute new data missing values. Centering scaling predictors now done prior growing forest.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-003","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.3","title":"aorsf 0.0.3","text":"CRAN release: 2022-10-09 Included rOpenSci reviewers Christopher Jackson, Marvin N Wright, Lukas Burk DESCRIPTION reviewers. Thank ! Added clarification docs pros/cons different variable importance techniques Added regression tests aorsf versus obliqueRSF (similar) Additional support tests functions long right hand sides Updated --bag vignette appropriate custom functions. Allow status values input data general, .e., just 0 1. Allow missing values predict functions, including partial dependence.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-002","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.2","title":"aorsf 0.0.2","text":"CRAN release: 2022-09-05 Modified unit tests compatibility extra checks run CRAN.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-001","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.1","title":"aorsf 0.0.1","text":"CRAN release: 2022-08-23 Added orsf_control_custom(), allows users submit custom functions identifying linear combinations inputs growing oblique decision trees. Added weights input orsf, allowing users fit orsf specific data training set. Added chf mort options predict.orsf_fit(). Mortality predictions fully implemented yet - supported partial dependence --bag error estimates. features added future update.","code":""},{"path":"https://bcjaeger.github.io/aorsf/news/index.html","id":"aorsf-0009000","dir":"Changelog","previous_headings":"","what":"aorsf 0.0.0.9000","title":"aorsf 0.0.0.9000","text":"Core features implemented: fit, interpret, predict using oblique random survival forests. Vignettes + Readme covering usage core features. Website hosted GitHub pages, managed pkgdown.","code":""}]