diff --git a/404.html b/404.html index 5497a35..0d2e8ae 100644 --- a/404.html +++ b/404.html @@ -41,6 +41,7 @@ diff --git a/LICENSE-text.html b/LICENSE-text.html index f75bf9a..37a655a 100644 --- a/LICENSE-text.html +++ b/LICENSE-text.html @@ -26,6 +26,7 @@ diff --git a/LICENSE.html b/LICENSE.html index fd08b47..fc5abc2 100644 --- a/LICENSE.html +++ b/LICENSE.html @@ -26,6 +26,7 @@ diff --git a/articles/Bayesian-Supervised-Learning.html b/articles/Bayesian-Supervised-Learning.html index 0c41fe3..b937c5e 100644 --- a/articles/Bayesian-Supervised-Learning.html +++ b/articles/Bayesian-Supervised-Learning.html @@ -43,6 +43,7 @@ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-1.png index 8f68e4f..8250ad3 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-2.png index bfd488a..d65f547 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-1.png index edc19c8..96d457e 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-2.png index a3c1f77..6870e99 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-1.png index 3fab971..19f1e15 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-2.png index 382c904..a5541b9 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_root_plot_plm_rfx-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-1.png index 55be3bd..539d608 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-2.png index ece060e..2193f40 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-1.png index 2a3ccbb..aeacfac 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-2.png index 82006e1..dcecea0 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-1.png index 9d24da9..bbff898 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-2.png index 4e60ed4..7b5d4e5 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/bart_warmstart_plot_plm_rfx-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-1.png index 9e9736f..1232b26 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-2.png index 109455e..bf2dae0 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-1.png index a400be8..6ca3532 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-2.png index a277cc3..f905ec5 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm-2.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-1.png b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-1.png index 46bfdc1..0915044 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-1.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-1.png differ diff --git a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-2.png b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-2.png index b5ee23f..9ca7d05 100644 Binary files a/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-2.png and b/articles/Bayesian-Supervised-Learning_files/figure-html/xbart_plot_plm_rfx-2.png differ diff --git a/articles/Causal-Inference.html b/articles/Causal-Inference.html index 6e701b1..0c8a906 100644 --- a/articles/Causal-Inference.html +++ b/articles/Causal-Inference.html @@ -43,6 +43,7 @@ @@ -193,7 +194,7 @@

Warmstart(test_ub >= tau_x[test_inds]) ) mean(cover) -#> [1] 0.97 +#> [1] 0.98

BART MCMC without Warmstart @@ -240,7 +241,7 @@

BART MCMC without Warmstart(test_ub >= tau_x[test_inds]) ) mean(cover) -#> [1] 0.95

+#> [1] 0.96 @@ -351,7 +352,7 @@

Warmstart(test_ub >= tau_x[test_inds]) ) mean(cover) -#> [1] 0.64 +#> [1] 0.9

BART MCMC without Warmstart @@ -398,7 +399,7 @@

BART MCMC without Warmstart(test_ub >= tau_x[test_inds]) ) mean(cover) -#> [1] 0.87

+#> [1] 0.72 @@ -670,7 +671,7 @@

Warmstart(test_ub >= tau_x[test_inds]) ) mean(cover) -#> [1] 1 +#> [1] 0.995

BART MCMC without Warmstart @@ -717,7 +718,7 @@

BART MCMC without Warmstart(test_ub >= tau_x[test_inds]) ) mean(cover) -#> [1] 0.97

+#> [1] 0.98 diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot-1.png b/articles/Causal-Inference_files/figure-html/bart_root_plot-1.png index 1b9f55f..80bd7bf 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot-1.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot-2.png b/articles/Causal-Inference_files/figure-html/bart_root_plot-2.png index d07f337..123e7c6 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot-2.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot-3.png b/articles/Causal-Inference_files/figure-html/bart_root_plot-3.png index 37986f7..05cd619 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot-3.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_2-1.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_2-1.png index 029a8b0..e25d156 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_2-1.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_2-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_2-2.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_2-2.png index 03f62c9..1ec6e0a 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_2-2.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_2-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_2-3.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_2-3.png index a67d3cd..517c08e 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_2-3.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_2-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_3-1.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_3-1.png index 9f73af1..d0a22a4 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_3-1.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_3-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_3-2.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_3-2.png index f397c10..701f0eb 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_3-2.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_3-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_3-3.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_3-3.png index a9a8c29..897766e 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_3-3.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_3-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_4-1.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_4-1.png index d55b3d6..0e92fdd 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_4-1.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_4-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_4-2.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_4-2.png index 1fb6166..6421342 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_4-2.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_4-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_root_plot_4-3.png b/articles/Causal-Inference_files/figure-html/bart_root_plot_4-3.png index a541a86..95c6b73 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_root_plot_4-3.png and b/articles/Causal-Inference_files/figure-html/bart_root_plot_4-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-1.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-1.png index e6c8deb..12db7a9 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-1.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-2.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-2.png index b2304b6..35d72f1 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-2.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-3.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-3.png index 093d842..1d9702a 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-3.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-1.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-1.png index 0273673..90bc0b9 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-1.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-2.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-2.png index e7990eb..243e26f 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-2.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-3.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-3.png index 4cac50d..6f404d6 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-3.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_2-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-1.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-1.png index 017fc15..bc7d7a7 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-1.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-2.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-2.png index b653173..6cec356 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-2.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-3.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-3.png index deca823..6e172cf 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-3.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_3-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-1.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-1.png index 10b3f9e..b04d796 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-1.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-2.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-2.png index 5f610b2..43a12e5 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-2.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-3.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-3.png index 465c6c8..b77d0bc 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-3.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_4-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-1.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-1.png index 2f85270..4792d6a 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-1.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-1.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-2.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-2.png index 6f0ede4..d65e8b2 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-2.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-2.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-3.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-3.png index 90b3a0a..2706ce4 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-3.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-3.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-4.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-4.png index fc15134..79b1c7c 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-4.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-4.png differ diff --git a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-5.png b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-5.png index 35e3ef7..1760f65 100644 Binary files a/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-5.png and b/articles/Causal-Inference_files/figure-html/bart_warmstart_plot_rfx-5.png differ diff --git a/articles/Ensemble-Kernel.html b/articles/Ensemble-Kernel.html new file mode 100644 index 0000000..d024a68 --- /dev/null +++ b/articles/Ensemble-Kernel.html @@ -0,0 +1,295 @@ + + + + + + + + +Kernel Methods from Tree Ensembles in StochTree • stochtree + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Motivation +

+

A trained tree ensemble with strong out-of-sample performance admits +a natural motivation for the “distance” between two samples: shared leaf +membership. We number the leaves in an ensemble from 1 to \(s\) (that is, if tree 1 has 3 leaves, it +reserves the numbers 1 - 3, and in turn if tree 2 has 5 leaves, it +reserves the numbers 4 - 8 to label its leaves, and so on). For a +dataset with \(n\) observations, we +construct the matrix \(W\) as +follows:

+
   Initialize \(W\) as a matrix of all zeroes with \(n\) rows and as many columns as leaves in +the ensemble
+   Let s = 0
+   FOR \(j\) +IN \(\left\{1,\dots,m\right\}\):
+      Let num_leaves be the number of leaves in tree \(j\)
+      FOR \(i\) +IN \(\left\{1,\dots,n\right\}\):
+         Let k be the leaf to which tree \(j\) maps observation \(i\)
+         Set element \(W_{i,k+s} = +1\)
+      Let s = s + num_leaves +
+

This sparse matrix \(W\) is a matrix +representation of the basis predictions of an ensemble (i.e. integrating +out the leaf parameters and just analyzing the leaf indices). For an +ensemble with \(m\) trees, we can +determine the proportion of trees that map each observation to the same +leaf by computing \(W W^T / m\). This +can form the basis for a kernel function used in a Gaussian process +regression, as we demonstrate below.

+

To begin, load the stochtree package and the +tgp package which will serve as a point of reference.

+ +
+
+

Demo 1: Univariate Supervised Learning +

+

We begin with a simulated example from the tgp package +(Gramacy and Taddy (2010)). This data +generating process (DGP) is non-stationary with a single numeric +covariate. We define a training set and test set and evaluate various +approaches to modeling the out of sample outcome data.

+
+

Traditional Gaussian Process +

+

We can use the tgp package to model this data with a +classical Gaussian Process.

+
+# Generate the data
+X_train <- seq(0,20,length=100)
+X_test <- seq(0,20,length=99)
+y_train <- (sin(pi*X_train/5) + 0.2*cos(4*pi*X_train/5)) * (X_train <= 9.6)
+lin_train <- X_train>9.6; 
+y_train[lin_train] <- -1 + X_train[lin_train]/10
+y_train <- y_train + rnorm(length(y_train), sd=0.1)
+y_test <- (sin(pi*X_test/5) + 0.2*cos(4*pi*X_test/5)) * (X_test <= 9.6)
+lin_test <- X_test>9.6; 
+y_test[lin_test] <- -1 + X_test[lin_test]/10
+
+# Fit the GP
+model_gp <- bgp(X=X_train, Z=y_train, XX=X_test)
+plot(model_gp$ZZ.mean, y_test, xlab = "predicted", ylab = "actual", main = "Gaussian process")
+abline(0,1,lwd=2.5,lty=3,col="red")
+

+
+sqrt(mean((model_gp$ZZ.mean - y_test)^2))
+

Assess the RMSE

+
+sqrt(mean((model_gp$ZZ.mean - y_test)^2))
+#> [1] 0.0466081
+
+
+

BART-based Gaussian process +

+
+# Run BART on the data
+num_trees <- 200
+sigma_leaf <- 1/num_trees
+bart_model <- bart(X_train=X_train, y_train=y_train, X_test=X_test, num_trees=num_trees)
+
+# Extract kernels needed for kriging
+result_kernels <- computeForestKernels(bart_model=bart_model, X_train=X_train, X_test=X_test)
+Sigma_11 <- result_kernels$kernel_test
+Sigma_12 <- result_kernels$kernel_test_train
+Sigma_22 <- result_kernels$kernel_train
+Sigma_22_inv <- ginv(Sigma_22)
+Sigma_21 <- t(Sigma_12)
+
+# Compute mean and covariance for the test set posterior
+mu_tilde <- Sigma_12 %*% Sigma_22_inv %*% y_train
+Sigma_tilde <- (sigma_leaf)*(Sigma_11 - Sigma_12 %*% Sigma_22_inv %*% Sigma_21)
+
+# Sample from f(X_test) | X_test, X_train, f(X_train)
+gp_samples <- mvtnorm::rmvnorm(1000, mean = mu_tilde, sigma = Sigma_tilde)
+
+# Compute posterior mean predictions for f(X_test)
+yhat_mean_test <- colMeans(gp_samples)
+plot(yhat_mean_test, y_test, xlab = "predicted", ylab = "actual", main = "BART Gaussian process")
+abline(0,1,lwd=2.5,lty=3,col="red")
+

+

Assess the RMSE

+
+sqrt(mean((yhat_mean_test - y_test)^2))
+#> [1] 0.09765312
+
+
+
+

Demo 2: Multivariate Supervised Learning +

+

We proceed to the simulated “Friedman” dataset, as implemented in +tgp.

+
+

Traditional Gaussian Process +

+

We can use the tgp package to model this data with a +classical Gaussian Process.

+
+# Generate the data, add many "noise variables"
+n <- 100
+friedman.df <- friedman.1.data(n=n)
+train_inds <- sort(sample(1:n, floor(0.8*n), replace = F))
+test_inds <- (1:n)[!((1:n) %in% train_inds)]
+X <- as.matrix(friedman.df)[,1:10]
+X <- cbind(X, matrix(runif(n*10), ncol = 10))
+y <- as.matrix(friedman.df)[,12] + rnorm(n,0,1)*(sd(as.matrix(friedman.df)[,11])/2)
+X_train <- X[train_inds,]
+X_test <- X[test_inds,]
+y_train <- y[train_inds]
+y_test <- y[test_inds]
+
+# Fit the GP
+model_gp <- bgp(X=X_train, Z=y_train, XX=X_test)
+plot(model_gp$ZZ.mean, y_test, xlab = "predicted", ylab = "actual", main = "Gaussian process")
+abline(0,1,lwd=2.5,lty=3,col="red")
+

+

Assess the RMSE

+
+sqrt(mean((model_gp$ZZ.mean - y_test)^2))
+#> [1] 5.023593
+
+
+

BART-based Gaussian process +

+
+# Run BART on the data
+num_trees <- 200
+sigma_leaf <- 1/num_trees
+bart_model <- bart(X_train=X_train, y_train=y_train, X_test=X_test, num_trees=num_trees)
+
+# Extract kernels needed for kriging
+result_kernels <- computeForestKernels(bart_model=bart_model, X_train=X_train, X_test=X_test)
+Sigma_11 <- result_kernels$kernel_test
+Sigma_12 <- result_kernels$kernel_test_train
+Sigma_22 <- result_kernels$kernel_train
+Sigma_22_inv <- ginv(Sigma_22)
+Sigma_21 <- t(Sigma_12)
+
+# Compute mean and covariance for the test set posterior
+mu_tilde <- Sigma_12 %*% Sigma_22_inv %*% y_train
+Sigma_tilde <- (sigma_leaf)*(Sigma_11 - Sigma_12 %*% Sigma_22_inv %*% Sigma_21)
+
+# Sample from f(X_test) | X_test, X_train, f(X_train)
+gp_samples <- mvtnorm::rmvnorm(1000, mean = mu_tilde, sigma = Sigma_tilde)
+
+# Compute posterior mean predictions for f(X_test)
+yhat_mean_test <- colMeans(gp_samples)
+plot(yhat_mean_test, y_test, xlab = "predicted", ylab = "actual", main = "BART Gaussian process")
+abline(0,1,lwd=2.5,lty=3,col="red")
+

+

Assess the RMSE

+
+sqrt(mean((yhat_mean_test - y_test)^2))
+#> [1] 5.198576
+

While the use case of a BART kernel for classical kriging is perhaps +unclear without more empirical investigation, we will see in a later +vignette that the kernel approach can be very beneficial for causal +inference applications.

+
+
+
+

References +

+
+
+Gramacy, Robert B., and Matthew Taddy. 2010. “Categorical Inputs, +Sensitivity Analysis, Optimization and Importance Tempering with tgp Version 2, an R Package for Treed +Gaussian Process Models.” Journal of Statistical +Software 33 (6): 1–48. https://doi.org/10.18637/jss.v033.i06. +
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-2-1.png b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-2-1.png new file mode 100644 index 0000000..1803231 Binary files /dev/null and b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-4-1.png b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 0000000..0d750d5 Binary files /dev/null and b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-6-1.png b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 0000000..a855b82 Binary files /dev/null and b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-8-1.png b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-8-1.png new file mode 100644 index 0000000..83b36a2 Binary files /dev/null and b/articles/Ensemble-Kernel_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/articles/Prototype-Interface.html b/articles/Prototype-Interface.html index e1d951c..ae920f5 100644 --- a/articles/Prototype-Interface.html +++ b/articles/Prototype-Interface.html @@ -43,6 +43,7 @@ @@ -1062,7 +1063,7 @@

Results

 mean((rowMeans(tau_hat[,1:num_gfr]) - tau_x)^2)
-#> [1] 0.2857801
+#> [1] 0.3165563

Inspect the warm start BART results

 plot(sigma_samples[(num_gfr+1):num_samples], ylab="sigma^2")
@@ -1079,7 +1080,7 @@

Results

 mean((rowMeans(tau_hat[,(num_gfr+1):num_samples]) - tau_x)^2)
-#> [1] 0.3717923
+#> [1] 0.5352369

Inspect the “adaptive coding” parameters \(b_0\) and \(b_1\).

 plot(b_0_samples, col = "blue", ylab = "Coding parameter draws", 
diff --git a/articles/Prototype-Interface_files/figure-html/adaptive_coding-1.png b/articles/Prototype-Interface_files/figure-html/adaptive_coding-1.png
index e26fdfc..6307644 100644
Binary files a/articles/Prototype-Interface_files/figure-html/adaptive_coding-1.png and b/articles/Prototype-Interface_files/figure-html/adaptive_coding-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-1.png b/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-1.png
index f438f20..4144a22 100644
Binary files a/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-1.png and b/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-2.png b/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-2.png
index 40d38a6..8d9cfcf 100644
Binary files a/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-2.png and b/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-2.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-3.png b/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-3.png
index 83127c0..999a253 100644
Binary files a/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-3.png and b/articles/Prototype-Interface_files/figure-html/bcf_warm_start_plot-3.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-1.png b/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-1.png
index 9906991..a13c8d9 100644
Binary files a/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-1.png and b/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-2.png b/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-2.png
index f2c6cc8..3e7e526 100644
Binary files a/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-2.png and b/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-2.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-3.png b/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-3.png
index 722832c..b4eca57 100644
Binary files a/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-3.png and b/articles/Prototype-Interface_files/figure-html/bcf_xbcf_plot-3.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-1.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-1.png
index 658b687..1b76e56 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-1.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-2.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-2.png
index b342be4..cef6e74 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-2.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-11-2.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-1.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-1.png
index 3447ff5..e561f20 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-1.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-2.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-2.png
index 492ef46..bd8d625 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-2.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-12-2.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-13-1.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-13-1.png
index fb9ac41..f8b5e11 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-13-1.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-13-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-1.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-1.png
index b54bf06..46db592 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-1.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-2.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-2.png
index cbad6be..3e05487 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-2.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-22-2.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-1.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-1.png
index 50c626d..2c6f9a0 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-1.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-2.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-2.png
index cd86330..8cca1b2 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-2.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-23-2.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-24-1.png b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-24-1.png
index b08d978..2a20d92 100644
Binary files a/articles/Prototype-Interface_files/figure-html/unnamed-chunk-24-1.png and b/articles/Prototype-Interface_files/figure-html/unnamed-chunk-24-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/warm_start_plot-1.png b/articles/Prototype-Interface_files/figure-html/warm_start_plot-1.png
index 6c93681..5b28b7c 100644
Binary files a/articles/Prototype-Interface_files/figure-html/warm_start_plot-1.png and b/articles/Prototype-Interface_files/figure-html/warm_start_plot-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/warm_start_plot-2.png b/articles/Prototype-Interface_files/figure-html/warm_start_plot-2.png
index dba2d55..164c033 100644
Binary files a/articles/Prototype-Interface_files/figure-html/warm_start_plot-2.png and b/articles/Prototype-Interface_files/figure-html/warm_start_plot-2.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/xbart_plot-1.png b/articles/Prototype-Interface_files/figure-html/xbart_plot-1.png
index b1af1a6..8b43916 100644
Binary files a/articles/Prototype-Interface_files/figure-html/xbart_plot-1.png and b/articles/Prototype-Interface_files/figure-html/xbart_plot-1.png differ
diff --git a/articles/Prototype-Interface_files/figure-html/xbart_plot-2.png b/articles/Prototype-Interface_files/figure-html/xbart_plot-2.png
index fffb79e..23ed1bb 100644
Binary files a/articles/Prototype-Interface_files/figure-html/xbart_plot-2.png and b/articles/Prototype-Interface_files/figure-html/xbart_plot-2.png differ
diff --git a/articles/index.html b/articles/index.html
index a85485d..4bc737a 100644
--- a/articles/index.html
+++ b/articles/index.html
@@ -26,6 +26,7 @@
   
 
@@ -50,6 +51,8 @@ 

All vignettes

Causal Machine Learning in StochTree
+
Kernel Methods from Tree Ensembles in StochTree
+
Prototype Interface in StochTree
diff --git a/authors.html b/authors.html index dc399fe..54ec097 100644 --- a/authors.html +++ b/authors.html @@ -26,6 +26,7 @@ diff --git a/index.html b/index.html index 26aa479..3bb7330 100644 --- a/index.html +++ b/index.html @@ -43,6 +43,7 @@ diff --git a/pkgdown.yml b/pkgdown.yml index d9db9c9..92c1b4d 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -4,8 +4,9 @@ pkgdown_sha: ~ articles: Bayesian-Supervised-Learning: Bayesian-Supervised-Learning.html Causal-Inference: Causal-Inference.html + Ensemble-Kernel: Ensemble-Kernel.html Prototype-Interface: Prototype-Interface.html -last_built: 2024-05-07T03:07Z +last_built: 2024-05-07T04:12Z urls: reference: https://stochastictree.github.io/stochtree-r/reference article: https://stochastictree.github.io/stochtree-r/articles diff --git a/reference/BART.html b/reference/BART.html index b0bf979..ac92d2e 100644 --- a/reference/BART.html +++ b/reference/BART.html @@ -26,6 +26,7 @@ diff --git a/reference/CppRNG.html b/reference/CppRNG.html index 78cbcb9..6310388 100644 --- a/reference/CppRNG.html +++ b/reference/CppRNG.html @@ -30,6 +30,7 @@ diff --git a/reference/ForestDataset.html b/reference/ForestDataset.html index dfac270..c14d697 100644 --- a/reference/ForestDataset.html +++ b/reference/ForestDataset.html @@ -30,6 +30,7 @@ diff --git a/reference/ForestKernel.html b/reference/ForestKernel.html new file mode 100644 index 0000000..fce9194 --- /dev/null +++ b/reference/ForestKernel.html @@ -0,0 +1,191 @@ + +Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel • stochtree + Skip to contents + + +
+
+
+ +
+

Computes leaf membership internally as a sparse matrix and also calculates a +(dense) kernel based on the sparse matrix all in C++.

+
+ + +
+

Public fields

+

forest_kernel_ptr
+

External pointer to a C++ StochTree::ForestKernel class

+ + +

+
+
+

Methods

+ +


+

Method new()

+

Create a new ForestKernel object.

+

Usage

+

+
+ +
+

Returns

+

A new ForestKernel object.

+
+ +


+

Method compute_leaf_indices()

+

Compute the leaf indices of each tree in the ensemble for every observation in a dataset. +Stores the result internally, which can be extracted from the class via a call to get_leaf_indices.

+

Usage

+

ForestKernel$compute_leaf_indices(
+  covariates_train,
+  covariates_test = NULL,
+  forest_container,
+  forest_num
+)

+
+ +
+

Arguments

+

covariates_train
+

Matrix of training set covariates at which to compute leaf indices

+ + +
covariates_test
+

(Optional) Matrix of test set covariates at which to compute leaf indices

+ + +
forest_container
+

Object of type ForestSamples

+ + +
forest_num
+

Index of the forest in forest_container to be assessed

+ + +

+
+
+

Returns

+

List of vectors. If covariates_test = NULL the list has one element (train set leaf indices), and +otherwise the list has two elements (train and test set leaf indices).

+
+ +


+

Method compute_kernel()

+

Compute the kernel implied by a tree ensemble. This function calls compute_leaf_indices, +so it is not necessary to call both. compute_leaf_indices is exposed at the class level +to allow for extracting the vector of leaf indices for an ensemble directly in R.

+

Usage

+

ForestKernel$compute_kernel(
+  covariates_train,
+  covariates_test = NULL,
+  forest_container,
+  forest_num
+)

+
+ +
+

Arguments

+

covariates_train
+

Matrix of training set covariates at which to assess ensemble kernel

+ + +
covariates_test
+

(Optional) Matrix of test set covariates at which to assess ensemble kernel

+ + +
forest_container
+

Object of type ForestSamples

+ + +
forest_num
+

Index of the forest in forest_container to be assessed

+ + +

+
+
+

Returns

+

List of matrices. If covariates_test = NULL, the list contains +one n_train x n_train matrix, where n_train = nrow(covariates_train). +This matrix is the kernel defined by W_train %*% t(W_train) where W_train +is a matrix with n_train rows and as many columns as there are total leaves in an ensemble. +If covariates_test is not NULL, the list contains two more matrices defined by +W_test %*% t(W_train) and W_test %*% t(W_test).

+
+ +
+ +
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/reference/ForestModel.html b/reference/ForestModel.html index 88e3650..3245aaa 100644 --- a/reference/ForestModel.html +++ b/reference/ForestModel.html @@ -30,6 +30,7 @@ diff --git a/reference/ForestSamples.html b/reference/ForestSamples.html index 9ea23b8..15b0842 100644 --- a/reference/ForestSamples.html +++ b/reference/ForestSamples.html @@ -26,6 +26,7 @@ @@ -72,6 +73,7 @@

Public methods

ForestSamples$save_json()

  • ForestSamples$load_json()

  • ForestSamples$num_samples()

  • +
  • ForestSamples$num_trees()

  • ForestSamples$output_dimension()


  • Method new()

    @@ -283,15 +285,27 @@

    Returns
    +

    Method num_trees()

    +

    Return number of trees in each ensemble of a ForestContainer object

    +

    Usage

    +

    ForestSamples$num_trees()

    +
    + +
    +

    Returns

    +

    Tree count

    +
    +


    Method output_dimension()

    Return output dimension of trees in a ForestContainer object

    -

    Usage

    +

    Usage

    ForestSamples$output_dimension()

    -

    Returns

    +

    Returns

    Leaf node parameter size

    diff --git a/reference/Outcome.html b/reference/Outcome.html index 6268fe3..b145b2d 100644 --- a/reference/Outcome.html +++ b/reference/Outcome.html @@ -36,6 +36,7 @@ diff --git a/reference/RandomEffectSamples.html b/reference/RandomEffectSamples.html index 101b439..52fcf91 100644 --- a/reference/RandomEffectSamples.html +++ b/reference/RandomEffectSamples.html @@ -28,6 +28,7 @@ diff --git a/reference/RandomEffectsDataset.html b/reference/RandomEffectsDataset.html index 2c1f1cf..0f55b93 100644 --- a/reference/RandomEffectsDataset.html +++ b/reference/RandomEffectsDataset.html @@ -28,6 +28,7 @@ diff --git a/reference/RandomEffectsModel.html b/reference/RandomEffectsModel.html index 9d15705..54b0c0e 100644 --- a/reference/RandomEffectsModel.html +++ b/reference/RandomEffectsModel.html @@ -28,6 +28,7 @@ diff --git a/reference/RandomEffectsTracker.html b/reference/RandomEffectsTracker.html index 7b4d3c0..81a829d 100644 --- a/reference/RandomEffectsTracker.html +++ b/reference/RandomEffectsTracker.html @@ -30,6 +30,7 @@ diff --git a/reference/bcf.html b/reference/bcf.html index 9df21f4..89ac455 100644 --- a/reference/bcf.html +++ b/reference/bcf.html @@ -26,6 +26,7 @@ diff --git a/reference/computeForestKernels.html b/reference/computeForestKernels.html new file mode 100644 index 0000000..7b269e4 --- /dev/null +++ b/reference/computeForestKernels.html @@ -0,0 +1,119 @@ + +Compute a kernel from a tree ensemble, defined by the fraction of trees of an ensemble in which two observations fall into the same leaf. — computeForestKernels • stochtree + Skip to contents + + +
    +
    +
    + +
    +

    Compute a kernel from a tree ensemble, defined by the fraction +of trees of an ensemble in which two observations fall into the +same leaf.

    +
    + +
    +

    Usage

    +
    computeForestKernels(bart_model, X_train, X_test = NULL, forest_num = NULL)
    +
    + +
    +

    Arguments

    +
    bart_model
    +

    Object of type bartmodel corresponding to a BART model with at least one sample

    + + +
    X_train
    +

    Matrix of "training" data. In a traditional Gaussian process kriging context, this +corresponds to the observations for which outcomes are observed.

    + + +
    X_test
    +

    (Optional) Matrix of "test" data. In a traditional Gaussian process kriging context, this +corresponds to the observations for which outcomes are unobserved and must be estimated +based on the kernels k(X_test,X_test), k(X_test,X_train), and k(X_train,X_train). If not provided, +this function will only compute k(X_train, X_train).

    + + +
    forest_num
    +

    (Option) Index of the forest sample to use for kernel computation. If not provided, +this function will use the last forest.

    + +
    +
    +

    Value

    + + +

    List of kernel matrices. If X_test = NULL, the list contains +one n_train x n_train matrix, where n_train = nrow(X_train). +This matrix is the kernel defined by W_train %*% t(W_train) where W_train

    + + +

    is a matrix with n_train rows and as many columns as there are total leaves in an ensemble. +If X_test is not NULL, the list contains two more matrices defined by +W_test %*% t(W_train) and W_test %*% t(W_test).

    +
    + +
    + + +
    + + + +
    + + + + + + + diff --git a/reference/computeForestLeafIndices.html b/reference/computeForestLeafIndices.html new file mode 100644 index 0000000..aca4a65 --- /dev/null +++ b/reference/computeForestLeafIndices.html @@ -0,0 +1,150 @@ + +Compute and return a vector representation of a forest's leaf predictions for every observation in a dataset. The vector has a "column-major" format that can be easily re-represented as as a CSC sparse matrix: elements are organized so that the first n elements correspond to leaf predictions for all n observations in a dataset for the first tree in an ensemble, the next n elements correspond to predictions for the second tree and so on. The "data" for each element corresponds to a uniquely mapped column index that corresponds to a single leaf of a single tree (i.e. if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's leaf indices begin at 3, etc...). Users may pass a single dataset (which we refer to here as a "training set") or two datasets (which we refer to as "training and test sets"). This verbiage hints that one potential use-case for a matrix of leaf indices is to define a ensemble-based kernel for kriging. — computeForestLeafIndices • stochtree + Skip to contents + + +
    +
    +
    + +
    +

    Compute and return a vector representation of a forest's leaf predictions for +every observation in a dataset. +The vector has a "column-major" format that can be easily re-represented as +as a CSC sparse matrix: elements are organized so that the first n elements +correspond to leaf predictions for all n observations in a dataset for the +first tree in an ensemble, the next n elements correspond to predictions for +the second tree and so on. The "data" for each element corresponds to a uniquely +mapped column index that corresponds to a single leaf of a single tree (i.e. +if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's +leaf indices begin at 3, etc...). +Users may pass a single dataset (which we refer to here as a "training set") +or two datasets (which we refer to as "training and test sets"). This verbiage +hints that one potential use-case for a matrix of leaf indices is to define a +ensemble-based kernel for kriging.

    +
    + +
    +

    Usage

    +
    computeForestLeafIndices(bart_model, X_train, X_test = NULL, forest_num = NULL)
    +
    + +
    +

    Arguments

    +
    bart_model
    +

    Object of type bartmodel corresponding to a BART model with at least one sample

    + + +
    X_train
    +

    Matrix of "training" data. In a traditional Gaussian process kriging context, this +corresponds to the observations for which outcomes are observed.

    + + +
    X_test
    +

    (Optional) Matrix of "test" data. In a traditional Gaussian process kriging context, this +corresponds to the observations for which outcomes are unobserved and must be estimated +based on the kernels k(X_test,X_test), k(X_test,X_train), and k(X_train,X_train). If not provided, +this function will only compute k(X_train, X_train).

    + + +
    forest_num
    +

    (Option) Index of the forest sample to use for kernel computation. If not provided, +this function will use the last forest.

    + +
    +
    +

    Value

    + + +

    List of vectors. If X_test = NULL, the list contains +one vector of length n_train * num_trees, where n_train = nrow(X_train)

    + + +

    and num_trees is the number of trees in bart_model. If X_test is not NULL, +the list contains another vector of length n_test * num_trees.

    +
    + +
    + + +
    + + + +
    + + + + + + + diff --git a/reference/createForestContainer.html b/reference/createForestContainer.html index 776819a..cd7a04e 100644 --- a/reference/createForestContainer.html +++ b/reference/createForestContainer.html @@ -26,6 +26,7 @@ diff --git a/reference/createForestDataset.html b/reference/createForestDataset.html index 76663b6..7e1fe51 100644 --- a/reference/createForestDataset.html +++ b/reference/createForestDataset.html @@ -26,6 +26,7 @@ diff --git a/reference/createForestKernel.html b/reference/createForestKernel.html new file mode 100644 index 0000000..51e424a --- /dev/null +++ b/reference/createForestKernel.html @@ -0,0 +1,83 @@ + +Create a ForestKernel object — createForestKernel • stochtree + Skip to contents + + +
    +
    +
    + +
    +

    Create a ForestKernel object

    +
    + +
    +

    Usage

    +
    createForestKernel()
    +
    + +
    +

    Value

    + + +

    ForestKernel object

    +
    + +
    + + +
    + + + +
    + + + + + + + diff --git a/reference/createForestModel.html b/reference/createForestModel.html index 594b771..ecbc302 100644 --- a/reference/createForestModel.html +++ b/reference/createForestModel.html @@ -26,6 +26,7 @@ diff --git a/reference/createOutcome.html b/reference/createOutcome.html index b6a4256..f6c6fae 100644 --- a/reference/createOutcome.html +++ b/reference/createOutcome.html @@ -26,6 +26,7 @@ diff --git a/reference/createRNG.html b/reference/createRNG.html index 70fb370..93b831c 100644 --- a/reference/createRNG.html +++ b/reference/createRNG.html @@ -26,6 +26,7 @@ diff --git a/reference/createRandomEffectSamples.html b/reference/createRandomEffectSamples.html index 33a7a21..f3e98a5 100644 --- a/reference/createRandomEffectSamples.html +++ b/reference/createRandomEffectSamples.html @@ -26,6 +26,7 @@ diff --git a/reference/createRandomEffectsDataset.html b/reference/createRandomEffectsDataset.html index 102f15a..efcbb30 100644 --- a/reference/createRandomEffectsDataset.html +++ b/reference/createRandomEffectsDataset.html @@ -26,6 +26,7 @@ diff --git a/reference/createRandomEffectsModel.html b/reference/createRandomEffectsModel.html index 0d479e3..2eb22e0 100644 --- a/reference/createRandomEffectsModel.html +++ b/reference/createRandomEffectsModel.html @@ -26,6 +26,7 @@ diff --git a/reference/createRandomEffectsTracker.html b/reference/createRandomEffectsTracker.html index 49cd356..f48e9d6 100644 --- a/reference/createRandomEffectsTracker.html +++ b/reference/createRandomEffectsTracker.html @@ -26,6 +26,7 @@ diff --git a/reference/getRandomEffectSamples.bartmodel.html b/reference/getRandomEffectSamples.bartmodel.html index 5d4e328..cbc5385 100644 --- a/reference/getRandomEffectSamples.bartmodel.html +++ b/reference/getRandomEffectSamples.bartmodel.html @@ -26,6 +26,7 @@ diff --git a/reference/getRandomEffectSamples.bcf.html b/reference/getRandomEffectSamples.bcf.html index 5b69c69..5dfc9ac 100644 --- a/reference/getRandomEffectSamples.bcf.html +++ b/reference/getRandomEffectSamples.bcf.html @@ -26,6 +26,7 @@ diff --git a/reference/getRandomEffectSamples.html b/reference/getRandomEffectSamples.html index 428cfe9..3724f68 100644 --- a/reference/getRandomEffectSamples.html +++ b/reference/getRandomEffectSamples.html @@ -26,6 +26,7 @@ diff --git a/reference/index.html b/reference/index.html index 2eed7b4..87382fc 100644 --- a/reference/index.html +++ b/reference/index.html @@ -26,6 +26,7 @@ @@ -70,6 +71,11 @@

    All functionsForestKernel + +
    Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble.
    +
    + ForestModel
    Class that defines and samples a forest model
    @@ -110,6 +116,16 @@

    All functionscomputeForestKernels() + +
    Compute a kernel from a tree ensemble, defined by the fraction of trees of an ensemble in which two observations fall into the same leaf.
    +

    + + computeForestLeafIndices() +
    +
    Compute and return a vector representation of a forest's leaf predictions for every observation in a dataset. The vector has a "column-major" format that can be easily re-represented as as a CSC sparse matrix: elements are organized so that the first n elements correspond to leaf predictions for all n observations in a dataset for the first tree in an ensemble, the next n elements correspond to predictions for the second tree and so on. The "data" for each element corresponds to a uniquely mapped column index that corresponds to a single leaf of a single tree (i.e. if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's leaf indices begin at 3, etc...). Users may pass a single dataset (which we refer to here as a "training set") or two datasets (which we refer to as "training and test sets"). This verbiage hints that one potential use-case for a matrix of leaf indices is to define a ensemble-based kernel for kriging.
    +
    + createForestContainer()
    Create a container of forest samples
    @@ -120,6 +136,11 @@

    All functionscreateForestKernel() + +
    Create a ForestKernel object
    +

    + createForestModel()
    Create a forest model object
    diff --git a/reference/predict.bartmodel.html b/reference/predict.bartmodel.html index 95136a5..566d5e2 100644 --- a/reference/predict.bartmodel.html +++ b/reference/predict.bartmodel.html @@ -26,6 +26,7 @@ diff --git a/reference/predict.bcf.html b/reference/predict.bcf.html index 3fcbc5e..a56ad16 100644 --- a/reference/predict.bcf.html +++ b/reference/predict.bcf.html @@ -26,6 +26,7 @@ diff --git a/reference/sample_sigma2_one_iteration.html b/reference/sample_sigma2_one_iteration.html index cc7693b..eb8d1e2 100644 --- a/reference/sample_sigma2_one_iteration.html +++ b/reference/sample_sigma2_one_iteration.html @@ -26,6 +26,7 @@ diff --git a/reference/sample_tau_one_iteration.html b/reference/sample_tau_one_iteration.html index 0e3cab0..4e008e5 100644 --- a/reference/sample_tau_one_iteration.html +++ b/reference/sample_tau_one_iteration.html @@ -26,6 +26,7 @@ diff --git a/reference/stochtree-package.html b/reference/stochtree-package.html index f341fc3..6929bc3 100644 --- a/reference/stochtree-package.html +++ b/reference/stochtree-package.html @@ -26,6 +26,7 @@ diff --git a/search.json b/search.json index 68ba628..b6a1b90 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"https://stochastictree.github.io/stochtree-r/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 stochtree authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"simulation","dir":"Articles","previous_headings":"Demo 1: Step Function","what":"Simulation","title":"Bayesian Supervised Learning in StochTree","text":", generate data simple step function.","code":"# Generate the data n <- 500 p_x <- 10 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) noise_sd <- sd(f_XW) / snr y <- f_XW + rnorm(n, 0, 1)*noise_sd # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] W_test <- NULL W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"warmstart","dir":"Articles","previous_headings":"Demo 1: Step Function > Sampling and Analysis","what":"Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Hahn (2023)). default stochtree. Inspect initial XBART “warm-start” samples Inspect BART samples initialized XBART warm-start","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_warmstart <- stochtree::bart( X_train = X_train, y_train = y_train, X_test = X_test, leaf_model = 0, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_warmstart$sigma2_samples[1:num_gfr], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,1:num_gfr]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(bart_model_warmstart$sigma2_samples[(num_gfr + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,(num_gfr + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"bart-mcmc-without-warmstart","dir":"Articles","previous_headings":"Demo 1: Step Function > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin.","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_root <- stochtree::bart( X_train = X_train, y_train = y_train, X_test = X_test, leaf_model = 0, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_root$sigma2_samples[(num_burnin + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_root$yhat_test[,(num_burnin + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"simulation-1","dir":"Articles","previous_headings":"Demo 2: Partitioned Linear Model","what":"Simulation","title":"Bayesian Supervised Learning in StochTree","text":", generate data simple partitioned linear model.","code":"# Generate the data n <- 500 p_x <- 10 p_w <- 1 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) W <- matrix(runif(n*p_w), ncol = p_w) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) ) noise_sd <- sd(f_XW) / snr y <- f_XW + rnorm(n, 0, 1)*noise_sd # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] W_test <- W[test_inds,] W_train <- W[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"warmstart-1","dir":"Articles","previous_headings":"Demo 2: Partitioned Linear Model > Sampling and Analysis","what":"Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Hahn (2023)). default stochtree. Inspect initial XBART “warm-start” samples Inspect BART samples initialized XBART warm-start","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_warmstart <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_warmstart$sigma2_samples[1:num_gfr], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,1:num_gfr]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(bart_model_warmstart$sigma2_samples[(num_gfr + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,(num_gfr + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"bart-mcmc-without-warmstart-1","dir":"Articles","previous_headings":"Demo 2: Partitioned Linear Model > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin.","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_root <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_root$sigma2_samples[(num_burnin + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_root$yhat_test[,(num_burnin + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"simulation-2","dir":"Articles","previous_headings":"Demo 3: Partitioned Linear Model with Random Effects","what":"Simulation","title":"Bayesian Supervised Learning in StochTree","text":", generate data simple partitioned linear model additive random effect structure.","code":"# Generate the data n <- 500 p_x <- 10 p_w <- 1 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) W <- matrix(runif(n*p_w), ncol = p_w) group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-5, -3, 5, 3),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) ) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) noise_sd <- sd(f_XW) / snr y <- f_XW + rfx_term + rnorm(n, 0, 1)*noise_sd # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] W_test <- W[test_inds,] W_train <- W[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"warmstart-2","dir":"Articles","previous_headings":"Demo 3: Partitioned Linear Model with Random Effects > Sampling and Analysis","what":"Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Hahn (2023)). default stochtree. Inspect initial XBART “warm-start” samples Inspect BART samples initialized XBART warm-start","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_warmstart <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, W_test = W_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_warmstart$sigma2_samples[1:num_gfr], ylab=\"sigma^2\") abline(h=noise_sd^2,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(bart_model_warmstart$yhat_test[,1:num_gfr]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(bart_model_warmstart$sigma2_samples[(num_gfr + 1):num_samples], ylab=\"sigma^2\") abline(h=noise_sd^2,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(bart_model_warmstart$yhat_test[,(num_gfr + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"bart-mcmc-without-warmstart-2","dir":"Articles","previous_headings":"Demo 3: Partitioned Linear Model with Random Effects > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin.","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_root <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, W_test = W_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_root$sigma2_samples[(num_burnin + 1):num_samples], ylab=\"sigma^2\") abline(h=noise_sd^2,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(bart_model_root$yhat_test[,(num_burnin + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-1-nonlinear-outcome-model-heterogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process Hahn, Murray, Carvalho (2020): \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= 1 + g(X) + 6 \\lvert X_3 - 1 \\rvert\\\\ \\tau(X) &= 1 + 2 X_2 X_4\\\\ g(X) &= \\mathbb{}(X_5=1) \\times 2 - \\mathbb{}(X_5=2) \\times 1 - \\mathbb{}(X_5=3) \\times 4\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim \\text{Bernoulli}(1/2)\\\\ X_5 &\\sim \\text{Categorical}(1/3,1/3,1/3)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation","dir":"Articles","previous_headings":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart","dir":"Articles","previous_headings":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 1000 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.97"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart","dir":"Articles","previous_headings":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 1000 num_mcmc <- 1000 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.95"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-2-linear-outcome-model-heterogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process Hahn, Murray, Carvalho (2020): \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= 1 + g(X) + 6 X_1 X_3\\\\ \\tau(X) &= 1 + 2 X_2 X_4\\\\ g(X) &= \\mathbb{}(X_5=1) \\times 2 - \\mathbb{}(X_5=2) \\times 1 - \\mathbb{}(X_5=3) \\times 4\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim \\text{Bernoulli}(1/2)\\\\ X_5 &\\sim \\text{Categorical}(1/3,1/3,1/3)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-1","dir":"Articles","previous_headings":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu2(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-1","dir":"Articles","previous_headings":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.64"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart-1","dir":"Articles","previous_headings":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.87"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-3-linear-outcome-model-homogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process Hahn, Murray, Carvalho (2020): \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= 1 + g(X) + 6 X_1 X_3\\\\ \\tau(X) &= 3\\\\ g(X) &= \\mathbb{}(X_5=1) \\times 2 - \\mathbb{}(X_5=2) \\times 1 - \\mathbb{}(X_5=3) \\times 4\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim \\text{Bernoulli}(1/2)\\\\ X_5 &\\sim \\text{Categorical}(1/3,1/3,1/3)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-2","dir":"Articles","previous_headings":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu2(X) tau_x <- tau1(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-2","dir":"Articles","previous_headings":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 1"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart-2","dir":"Articles","previous_headings":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 1"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-4-nonlinear-outcome-model-heterogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process: \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= \\begin{cases} -1.1 & \\text{ } X_1 > X_2\\\\ 0.9 & \\text{ } X_1 \\leq X_2 \\end{cases}\\\\ \\tau(X) &= \\frac{1}{1+\\exp(-X_3)} + \\frac{X_2}{10}\\\\ \\pi(X) &= \\Phi\\left(\\mu(X)\\right)\\\\ Z &\\sim \\text{Bernoulli}\\left(\\pi(X)\\right)\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim N\\left(X_2,1\\right)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-3","dir":"Articles","previous_headings":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 1000 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- rnorm(n,x2,1) X <- cbind(x1,x2,x3,x4) p <- ncol(X) mu <- function(x) {-1*(x[,1]>(x[,2])) + 1*(x[,1]<(x[,2])) - 0.1} tau <- function(x) {1/(1 + exp(-x[,3])) + x[,2]/10} mu_x <- mu(X) tau_x <- tau(X) pi_x <- pnorm(mu_x) Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x sigma <- diff(range(mu_x + tau_x*pi))/8 y <- E_XZ + sigma*rnorm(n) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-3","dir":"Articles","previous_headings":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 1"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart-3","dir":"Articles","previous_headings":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.97"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-5-nonlinear-outcome-model-heterogeneous-treatment-effect-with-additive-random-effects","dir":"Articles","previous_headings":"","what":"Demo 5: Nonlinear Outcome Model, Heterogeneous Treatment Effect with Additive Random Effects","title":"Causal Machine Learning in StochTree","text":"augment simulated example Demo 1 additive random effect structure show bcf() function can estimate incorporate effects forest sampling procedure.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-4","dir":"Articles","previous_headings":"Demo 5: Nonlinear Outcome Model, Heterogeneous Treatment Effect with Additive Random Effects","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw augmented “demo 1” DGP","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-4","dir":"Articles","previous_headings":"Demo 5: Nonlinear Outcome Model, Heterogeneous Treatment Effect with Additive Random Effects > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"simulate “warm-start” model (running root-MCMC BART random effects simply matter modifying code snippet setting num_gfr <- 0 num_mcmc > 0). Inspect BART samples initialized XBART warm-start Examine test set interval coverage clear causal inference much difficult presence strong covariate-dependent prognostic effects strong group-level random effects. sense, proper prior calibration three \\(\\mu\\), \\(\\tau\\) random effects models crucial.","code":"num_gfr <- 100 num_burnin <- 0 num_mcmc <- 500 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = T, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$y_hat_test[,sample_inds]), y_test, xlab = \"predicted\", ylab = \"actual\", main = \"Outcome\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$rfx_preds_test[,sample_inds]), rfx_term_test, xlab = \"predicted\", ylab = \"actual\", main = \"Random effects terms\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ-rfx_term) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.92"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"motivation","dir":"Articles","previous_headings":"","what":"Motivation","title":"Prototype Interface in StochTree","text":"functions bart() bcf() provide simple performant interfaces supervised learning / causal inference, stochtree also offers access many “low-level” data structures typically implemented C++. low-level interface designed performance even simplicity — rather intent provide “prototype” interface C++ code doesn’t require modifying C++. illustrate prototype interface might useful, consider classic BART algorithm:    INPUT: \\(y\\), \\(X\\), \\(\\tau\\), \\(\\nu\\), \\(\\lambda\\), \\(\\alpha\\), \\(\\beta\\)    OUTPUT: \\(m\\) samples decision forest \\(k\\) trees global variance parameter \\(\\sigma^2\\)    Initialize \\(\\sigma^2\\) via default data-dependent calibration exercise    Initialize “forest 0” \\(k\\) trees single root node, referring tree \\(j\\)’s prediction vector \\(f_{0,j}\\)    Compute residual \\(r = y - \\sum_{j=1}^k f_{0,j}\\)    \\(\\) \\(\\left\\{1,\\dots,m\\right\\}\\):       Initialize forest \\(\\) forest \\(-1\\)       \\(j\\) \\(\\left\\{1,\\dots,k\\right\\}\\):          Add predictions tree \\(j\\) residual: \\(r = r + f_{,j}\\)          Update tree \\(j\\) via Metropolis-Hastings \\(r\\) \\(X\\) data tree priors depending (\\(\\tau\\), \\(\\sigma^2\\), \\(\\alpha\\), \\(\\beta\\))          Sample leaf node parameters tree \\(j\\) via Gibbs (leaf node prior \\(N\\left(0,\\tau\\right)\\))          Subtract (updated) predictions tree \\(j\\) residual: \\(r = r - f_{,j}\\)       Sample \\(\\sigma^2\\) via Gibbs (prior \\(IG(\\nu/2,\\nu\\lambda/2)\\)) algorithm conceptually simple, much core computation carried low-level languages C C++ tree data structure. result, changes algorithm, supporting heteroskedasticity (Pratola et al. (2020)), categorical outcomes (Murray (2021)) causal effect estimation (Hahn, Murray, Carvalho (2020)) require modifying low-level code. prototype interface exposes core components loop R level, thus making possible interchange C++ computation steps like “update tree \\(j\\) via Metropolis-Hastings” R computation custom variance model, user-specified additive mean model components, . begin, load stochtree package","code":"library(stochtree)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation","dir":"Articles","previous_headings":"Demo 1: Supervised Learning","what":"Simulation","title":"Prototype Interface in StochTree","text":"Simulate simple partitioned linear model","code":"# Generate the data n <- 500 p_X <- 10 p_W <- 1 X <- matrix(runif(n*p_X), ncol = p_X) W <- matrix(runif(n*p_W), ncol = p_W) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-3*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-1*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (1*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (3*W[,1]) ) y <- f_XW + rnorm(n, 0, 1) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling","dir":"Articles","previous_headings":"Demo 1: Supervised Learning","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Initialize R-level access C++ classes needed sample model Prepare run sampler Run grow--root sampler “warm-start” BART Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"alpha <- 0.9 beta <- 1.25 min_samples_leaf <- 1 num_trees <- 100 cutpoint_grid_size = 100 global_variance_init = 1. tau_init = 0.5 leaf_prior_scale = matrix(c(tau_init), ncol = 1) nu <- 4 lambda <- 0.5 a_leaf <- 2. b_leaf <- 0.5 leaf_regression <- T feature_types <- as.integer(rep(0, p_X)) # 0 = numeric var_weights <- rep(1/p_X, p_X) # Data if (leaf_regression) { forest_dataset <- createForestDataset(X, W) outcome_model_type <- 1 } else { forest_dataset <- createForestDataset(X) outcome_model_type <- 0 } outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf) # Container of forest samples if (leaf_regression) { forest_samples <- createForestContainer(num_trees, 1, F) } else { forest_samples <- createForestContainer(num_trees, 1, T) } num_warmstart <- 10 num_mcmc <- 100 num_samples <- num_warmstart + num_mcmc global_var_samples <- c(global_variance_init, rep(0, num_samples)) leaf_scale_samples <- c(tau_init, rep(0, num_samples)) for (i in 1:num_warmstart) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = T ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] } for (i in (num_warmstart+1):num_samples) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = F ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] } # Forest predictions preds <- forest_samples$predict(forest_dataset)*y_std + y_bar # Global error variance sigma_samples <- sqrt(global_var_samples)*y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results","dir":"Articles","previous_headings":"Demo 1: Supervised Learning","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect initial samples obtained via “grow--root” (Hahn (2023)) Inspect BART samples obtained “warm-starting”","code":"plot(sigma_samples[1:num_warmstart], ylab=\"sigma\") plot(rowMeans(preds[,1:num_warmstart]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(sigma_samples[(num_warmstart+1):num_samples], ylab=\"sigma\") plot(rowMeans(preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"demo-2-supervised-learning-with-additive-random-effects","dir":"Articles","previous_headings":"","what":"Demo 2: Supervised Learning with Additive Random Effects","title":"Prototype Interface in StochTree","text":"build example add simple “random effects” structure: every observation either group 1 group 2 random group intercept (simulated quite strong, underscoring need random effects modeling).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation-1","dir":"Articles","previous_headings":"Demo 2: Supervised Learning with Additive Random Effects","what":"Simulation","title":"Prototype Interface in StochTree","text":"Simulate partitioned linear model simple additive group random effect structure","code":"# Generate the data n <- 500 p_X <- 10 p_W <- 1 X <- matrix(runif(n*p_X), ncol = p_X) W <- matrix(runif(n*p_W), ncol = p_W) group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- c(-5, 5) rfx_basis <- rep(1, n) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-3*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-1*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (1*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (3*W[,1]) ) rfx_term <- rfx_coefs[group_ids] * rfx_basis y <- f_XW + rfx_term + rnorm(n, 0, 1) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling-1","dir":"Articles","previous_headings":"Demo 2: Supervised Learning with Additive Random Effects","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Set parameters inform random effects samplers Initialize R-level access C++ classes needed sample model Prepare run sampler Run grow--root sampler “warm-start” BART Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"alpha <- 0.9 beta <- 1.25 min_samples_leaf <- 1 num_trees <- 100 cutpoint_grid_size = 100 global_variance_init = 1. tau_init = 0.5 leaf_prior_scale = matrix(c(tau_init), ncol = 1) nu <- 4 lambda <- 0.5 a_leaf <- 2. b_leaf <- 0.5 leaf_regression <- T feature_types <- as.integer(rep(0, p_X)) # 0 = numeric var_weights <- rep(1/p_X, p_X) alpha_init <- c(1) xi_init <- matrix(c(1,1),1,2) sigma_alpha_init <- matrix(c(1),1,1) sigma_xi_init <- matrix(c(1),1,1) sigma_xi_shape <- 1 sigma_xi_scale <- 1 # Data if (leaf_regression) { forest_dataset <- createForestDataset(X, W) outcome_model_type <- 1 } else { forest_dataset <- createForestDataset(X) outcome_model_type <- 0 } outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf) # Container of forest samples if (leaf_regression) { forest_samples <- createForestContainer(num_trees, 1, F) } else { forest_samples <- createForestContainer(num_trees, 1, T) } # Random effects dataset rfx_basis <- as.matrix(rfx_basis) group_ids <- as.integer(group_ids) rfx_dataset <- createRandomEffectsDataset(group_ids, rfx_basis) # Random effects details num_groups <- length(unique(group_ids)) num_components <- ncol(rfx_basis) # Random effects tracker rfx_tracker <- createRandomEffectsTracker(group_ids) # Random effects model rfx_model <- createRandomEffectsModel(num_components, num_groups) rfx_model$set_working_parameter(alpha_init) rfx_model$set_group_parameters(xi_init) rfx_model$set_working_parameter_cov(sigma_alpha_init) rfx_model$set_group_parameter_cov(sigma_xi_init) rfx_model$set_variance_prior_shape(sigma_xi_shape) rfx_model$set_variance_prior_scale(sigma_xi_scale) # Random effect samples rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) num_warmstart <- 10 num_mcmc <- 100 num_samples <- num_warmstart + num_mcmc global_var_samples <- c(global_variance_init, rep(0, num_samples)) leaf_scale_samples <- c(tau_init, rep(0, num_samples)) for (i in 1:num_warmstart) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = T ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } for (i in (num_warmstart+1):num_samples) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = F ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } # Forest predictions forest_preds <- forest_samples$predict(forest_dataset)*y_std + y_bar # Random effects predictions rfx_preds <- rfx_samples$predict(group_ids, rfx_basis)*y_std # Overall predictions preds <- forest_preds + rfx_preds # Global error variance sigma_samples <- sqrt(global_var_samples)*y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results-1","dir":"Articles","previous_headings":"Demo 2: Supervised Learning with Additive Random Effects","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect initial samples obtained via grow--root additive random effects model Inspect BART samples obtained “warm-starting” plus additive random effects model Now inspect samples BART forest alone (without considering random effect predictions)","code":"plot(sigma_samples[1:num_warmstart], ylab=\"sigma\") plot(rowMeans(preds[,1:num_warmstart]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(sigma_samples[(num_warmstart+1):num_samples], ylab=\"sigma\") plot(rowMeans(preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(forest_preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"demo-3-supervised-learning-with-additive-multi-component-random-effects","dir":"Articles","previous_headings":"","what":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","title":"Prototype Interface in StochTree","text":"build example, case allowing random intercept regression coefficient (pre-specified basis) group (1 2).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation-2","dir":"Articles","previous_headings":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","what":"Simulation","title":"Prototype Interface in StochTree","text":"Simulate partitioned linear model simple additive group random effect structure","code":"# Generate the data n <- 500 p_X <- 10 p_W <- 1 X <- matrix(runif(n*p_X), ncol = p_X) W <- matrix(runif(n*p_W), ncol = p_W) group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-5, -3, 5, 3),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-3*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-1*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (1*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (3*W[,1]) ) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) y <- f_XW + rfx_term + rnorm(n, 0, 1) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling-2","dir":"Articles","previous_headings":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Set parameters inform random effects samplers Initialize R-level access C++ classes needed sample model Prepare run sampler Run grow--root sampler “warm-start” BART Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"alpha <- 0.9 beta <- 1.25 min_samples_leaf <- 1 num_trees <- 100 cutpoint_grid_size = 100 global_variance_init = 1. tau_init = 0.5 leaf_prior_scale = matrix(c(tau_init), ncol = 1) nu <- 4 lambda <- 0.5 a_leaf <- 2. b_leaf <- 0.5 leaf_regression <- T feature_types <- as.integer(rep(0, p_X)) # 0 = numeric var_weights <- rep(1/p_X, p_X) alpha_init <- c(1,0) xi_init <- matrix(c(1,0,1,0),2,2) sigma_alpha_init <- diag(1,2,2) sigma_xi_init <- diag(1,2,2) sigma_xi_shape <- 1 sigma_xi_scale <- 1 # Data if (leaf_regression) { forest_dataset <- createForestDataset(X, W) outcome_model_type <- 1 } else { forest_dataset <- createForestDataset(X) outcome_model_type <- 0 } outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf) # Container of forest samples if (leaf_regression) { forest_samples <- createForestContainer(num_trees, 1, F) } else { forest_samples <- createForestContainer(num_trees, 1, T) } # Random effects dataset rfx_basis <- as.matrix(rfx_basis) group_ids <- as.integer(group_ids) rfx_dataset <- createRandomEffectsDataset(group_ids, rfx_basis) # Random effects details num_groups <- length(unique(group_ids)) num_components <- ncol(rfx_basis) # Random effects tracker rfx_tracker <- createRandomEffectsTracker(group_ids) # Random effects model rfx_model <- createRandomEffectsModel(num_components, num_groups) rfx_model$set_working_parameter(alpha_init) rfx_model$set_group_parameters(xi_init) rfx_model$set_working_parameter_cov(sigma_alpha_init) rfx_model$set_group_parameter_cov(sigma_xi_init) rfx_model$set_variance_prior_shape(sigma_xi_shape) rfx_model$set_variance_prior_scale(sigma_xi_scale) # Random effect samples rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) num_warmstart <- 10 num_mcmc <- 100 num_samples <- num_warmstart + num_mcmc global_var_samples <- c(global_variance_init, rep(0, num_samples)) leaf_scale_samples <- c(tau_init, rep(0, num_samples)) for (i in 1:num_warmstart) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = T ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } for (i in (num_warmstart+1):num_samples) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = F ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } # Forest predictions forest_preds <- forest_samples$predict(forest_dataset)*y_std + y_bar # Random effects predictions rfx_preds <- rfx_samples$predict(group_ids, rfx_basis)*y_std # Overall predictions preds <- forest_preds + rfx_preds # Global error variance sigma_samples <- sqrt(global_var_samples)*y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results-2","dir":"Articles","previous_headings":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect initial samples obtained via grow--root additive random effects model Inspect BART samples obtained “warm-starting” plus additive random effects model Now inspect samples BART forest alone (without considering random effect predictions)","code":"plot(sigma_samples[1:num_warmstart], ylab=\"sigma\") plot(rowMeans(preds[,1:num_warmstart]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(sigma_samples[(num_warmstart+1):num_samples], ylab=\"sigma\") plot(rowMeans(preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(forest_preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"demo-4-causal-inference","dir":"Articles","previous_headings":"","what":"Demo 4: Causal Inference","title":"Prototype Interface in StochTree","text":"show implement Bayesian Causal Forest (BCF) model Hahn, Murray, Carvalho (2020) using stochtree’s prototype API, including demoing non-trivial sampling step done R level.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"background","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Background","title":"Prototype Interface in StochTree","text":"supervised learning case previous demo conceptually simple, motivate causal effect estimation task additional notation. Let \\(y\\) refer continuous outcome interest, \\(Z\\) refer binary treatment, \\(X\\) set covariates may influence \\(Y\\), \\(Z\\), . \\(X\\) exhaustive set covariates influence \\(Z\\) \\(Y\\), can specific \\(Y\\) terms causal model (see example Pearl (2009)) \\[\\begin{equation*} \\begin{aligned} Y &= F(Z, X, \\epsilon_Y) \\end{aligned} \\end{equation*}\\] \\(\\epsilon_Y\\) outcome specific random noise \\(F\\) function generates \\(Y\\) (many cases, \\(F\\) can thought inverse CDF conditional \\(X\\) \\(Z\\)). “potential outcomes” (see Imbens Rubin (2015)) can recovered \\(Y^1 = F(1, X, \\epsilon_Y)\\) \\(Y^0 = F(0, X, \\epsilon_Y)\\). causal outcome model can decomposed “mean” “error” terms \\[\\begin{equation*} \\begin{aligned} Y &= \\mu(X) + Z\\tau(X) + \\left[\\eta(X) + Z\\delta(X)\\right]\\\\ \\mu(X) &= \\mathbb{E}_{\\epsilon_Y}\\left[F(0, X, \\epsilon_Y)\\right]\\\\ \\tau(X) &= \\mathbb{E}_{\\epsilon_Y}\\left[F(1, X, \\epsilon_Y) - F(0, X, \\epsilon_Y)\\right]\\\\ \\eta(X) &= F(0, X, \\epsilon_Y) - \\mathbb{E}_{\\epsilon_Y}\\left[F(0, X, \\epsilon_Y)\\right]\\\\ \\delta(X) &= F(1, X, \\epsilon_Y) - F(0, X, \\epsilon_Y) - \\mathbb{E}_{\\epsilon_Y}\\left[F(1, X, \\epsilon_Y) - F(0, X, \\epsilon_Y)\\right] \\end{aligned} \\end{equation*}\\] \\(\\tau(X)\\) precisely conditional average treatment effect (CATE) estimand. Unfortunately, functional form \\(F\\) unavailable analysis, \\(\\tau(X)\\) derived. flexible, regularized nonparametrics enter picture, aim estimate \\(\\mu(X)\\) \\(\\tau(X)\\) data.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"bayesian-causal-forest-bcf","dir":"Articles","previous_headings":"Demo 4: Causal Inference > Background","what":"Bayesian Causal Forest (BCF)","title":"Prototype Interface in StochTree","text":"BCF estimates \\(\\mu(X)\\) \\(\\tau(X)\\) using separate BART forests term. Furthermore, rather rely common implicit coding \\(Z\\) 0 control observations 1 treated observations, consider coding control observations parameter \\(b_0\\) treated observations parameter \\(b_1\\). Placing \\(N(0,1/2)\\) prior \\(b_z\\), essentially redefines outcome model \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) f(Z) + \\epsilon\\\\ f(Z) &= b_0(1-Z) + b_1 Z\\\\ \\epsilon &\\sim N\\left(0, \\sigma^2\\right)\\\\ b_0, b_1 &\\sim N\\left(0, 1/2\\right) \\end{aligned} \\end{equation*}\\] Updating \\(b_z\\) requires additional Gibbs step, derive . Conditioning sampled forests \\(\\mu\\) \\(\\tau\\), essentially regressing \\(y - \\mu(Z)\\) \\(\\left[(1-Z)\\tau(X), Z\\tau(X)\\right]\\) closed form posterior \\[\\begin{equation*} \\begin{aligned} b_0 \\mid y, X, \\mu,\\tau &\\sim N\\left(\\frac{s_{y\\tau,0}}{s_{\\tau\\tau,0} + 2\\sigma^2}, \\frac{\\sigma^2}{s_{\\tau\\tau,0} + 2\\sigma^2}\\right)\\\\ b_1 \\mid y, X, \\mu,\\tau &\\sim N\\left(\\frac{s_{y\\tau,1}}{s_{\\tau\\tau,1} + 2\\sigma^2}, \\frac{\\sigma^2}{s_{\\tau\\tau,1} + 2\\sigma^2}\\right) \\end{aligned} \\end{equation*}\\] \\(s_{y\\tau,z} = \\sum_{: Z_i = z} (y_i - \\mu(X_i))\\tau(X_i)\\) \\(s_{\\tau\\tau,z} = \\sum_{: Z_i = z} \\tau(X_i)\\tau(X_i)\\).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation-3","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Simulation","title":"Prototype Interface in StochTree","text":"simulated causal DGP mirrors nonlinear, heterogeneous treatment effect DGP presented Hahn, Murray, Carvalho (2020).","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 4 y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling-3","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Prepare run sampler (now must specify initial values \\(b_0\\) \\(b_1\\), choose -1/2 1/2 instead 0 1). Initialize R-level access C++ classes needed sample model Run grow--root sampler “warm-start” BART, also updating adaptive coding parameter \\(b_0\\) \\(b_1\\) Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"# Mu forest alpha_mu <- 0.95 beta_mu <- 2.0 min_samples_leaf_mu <- 5 num_trees_mu <- 250 cutpoint_grid_size_mu = 100 tau_init_mu = 1/num_trees_mu leaf_prior_scale_mu = matrix(c(tau_init_mu), ncol = 1) a_leaf_mu <- 3. b_leaf_mu <- var(resid)/(num_trees_mu) leaf_regression_mu <- F sigma_leaf_mu <- var(resid)/(num_trees_mu) current_leaf_scale_mu <- as.matrix(sigma_leaf_mu) # Tau forest alpha_tau <- 0.25 beta_tau <- 3.0 min_samples_leaf_tau <- 5 num_trees_tau <- 50 cutpoint_grid_size_tau = 100 a_leaf_tau <- 3. b_leaf_tau <- var(resid)/(2*num_trees_tau) leaf_regression_tau <- T sigma_leaf_tau <- var(resid)/(2*num_trees_tau) current_leaf_scale_tau <- as.matrix(sigma_leaf_tau) # Common parameters nu <- 3 sigma2hat <- (sigma(lm(resid~X)))^2 quantile_cutoff <- 0.9 if (is.null(lambda)) { lambda <- (sigma2hat*qgamma(1-quantile_cutoff,nu))/nu } sigma2 <- sigma2hat current_sigma2 <- sigma2 # Sampling composition num_gfr <- 20 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc # Sigma^2 samples global_var_samples <- rep(0, num_samples) # Adaptive coding parameter samples b_0_samples <- rep(0, num_samples) b_1_samples <- rep(0, num_samples) b_0 <- -0.5 b_1 <- 0.5 current_b_0 <- b_0 current_b_1 <- b_1 tau_basis <- (1-Z)*current_b_0 + Z*current_b_1 # Data X_mu <- cbind(X, pi_x) X_tau <- X feature_types <- c(0,0,0,1,1) feature_types_mu <- as.integer(c(feature_types,0)) feature_types_tau <- as.integer(feature_types) variable_weights_mu = rep(1/ncol(X_mu), ncol(X_mu)) variable_weights_tau = rep(1/ncol(X_tau), ncol(X_tau)) forest_dataset_mu <- createForestDataset(X_mu) forest_dataset_tau <- createForestDataset(X_tau, tau_basis) outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model_mu <- createForestModel( forest_dataset_mu, feature_types_mu, num_trees_mu, nrow(X_mu), alpha_mu, beta_mu, min_samples_leaf_mu ) forest_model_tau <- createForestModel( forest_dataset_tau, feature_types_tau, num_trees_tau, nrow(X_tau), alpha_tau, beta_tau, min_samples_leaf_tau ) # Container of forest samples forest_samples_mu <- createForestContainer(num_trees_mu, 1, T) forest_samples_tau <- createForestContainer(num_trees_tau, 1, F) # Initialize the leaves of each tree in the prognostic forest forest_samples_mu$set_root_leaves(0, mean(resid) / num_trees_mu) forest_samples_mu$update_residual( forest_dataset_mu, outcome, forest_model_mu, F, 0, F ) # Initialize the leaves of each tree in the treatment effect forest forest_samples_tau$set_root_leaves(0, 0.) forest_samples_tau$update_residual( forest_dataset_tau, outcome, forest_model_tau, T, 0, F ) if (num_gfr > 0){ for (i in 1:num_gfr) { # Sample the prognostic forest forest_model_mu$sample_one_iteration( forest_dataset_mu, outcome, forest_samples_mu, rng, feature_types_mu, 0, current_leaf_scale_mu, variable_weights_mu, current_sigma2, cutpoint_grid_size, gfr = T, pre_initialized = T ) # Sample variance parameters (if requested) global_var_samples[i] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) current_sigma2 <- global_var_samples[i] # Sample the treatment forest forest_model_tau$sample_one_iteration( forest_dataset_tau, outcome, forest_samples_tau, rng, feature_types_tau, 1, current_leaf_scale_tau, variable_weights_tau, current_sigma2, cutpoint_grid_size, gfr = T, pre_initialized = T ) # Sample adaptive coding parameters mu_x_raw <- forest_samples_mu$predict_raw_single_forest(forest_dataset_mu, i-1) tau_x_raw <- forest_samples_tau$predict_raw_single_forest(forest_dataset_tau, i-1) s_tt0 <- sum(tau_x_raw*tau_x_raw*(Z==0)) s_tt1 <- sum(tau_x_raw*tau_x_raw*(Z==1)) partial_resid_mu <- resid - mu_x_raw s_ty0 <- sum(tau_x_raw*partial_resid_mu*(Z==0)) s_ty1 <- sum(tau_x_raw*partial_resid_mu*(Z==1)) current_b_0 <- rnorm(1, (s_ty0/(s_tt0 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt0 + 2*current_sigma2))) current_b_1 <- rnorm(1, (s_ty1/(s_tt1 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt1 + 2*current_sigma2))) tau_basis <- (1-Z)*current_b_0 + Z*current_b_1 forest_dataset_tau$update_basis(tau_basis) b_0_samples[i] <- current_b_0 b_1_samples[i] <- current_b_1 # Sample variance parameters (if requested) global_var_samples[i] <- sample_sigma2_one_iteration(outcome, rng, nu, lambda) current_sigma2 <- global_var_samples[i] } } if (num_burnin + num_mcmc > 0) { for (i in (num_gfr+1):num_samples) { # Sample the prognostic forest forest_model_mu$sample_one_iteration( forest_dataset_mu, outcome, forest_samples_mu, rng, feature_types_mu, 0, current_leaf_scale_mu, variable_weights_mu, current_sigma2, cutpoint_grid_size, gfr = F, pre_initialized = T ) # Sample global variance parameter global_var_samples[i] <- sample_sigma2_one_iteration(outcome, rng, nu, lambda) current_sigma2 <- global_var_samples[i] # Sample the treatment forest forest_model_tau$sample_one_iteration( forest_dataset_tau, outcome, forest_samples_tau, rng, feature_types_tau, 1, current_leaf_scale_tau, variable_weights_tau, current_sigma2, cutpoint_grid_size, gfr = F, pre_initialized = T ) # Sample coding parameters mu_x_raw <- forest_samples_mu$predict_raw_single_forest(forest_dataset_mu, i-1) tau_x_raw <- forest_samples_tau$predict_raw_single_forest(forest_dataset_tau, i-1) s_tt0 <- sum(tau_x_raw*tau_x_raw*(Z==0)) s_tt1 <- sum(tau_x_raw*tau_x_raw*(Z==1)) partial_resid_mu <- resid - mu_x_raw s_ty0 <- sum(tau_x_raw*partial_resid_mu*(Z==0)) s_ty1 <- sum(tau_x_raw*partial_resid_mu*(Z==1)) current_b_0 <- rnorm(1, (s_ty0/(s_tt0 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt0 + 2*current_sigma2))) current_b_1 <- rnorm(1, (s_ty1/(s_tt1 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt1 + 2*current_sigma2))) tau_basis <- (1-Z)*current_b_0 + Z*current_b_1 forest_dataset_tau$update_basis(tau_basis) b_0_samples[i] <- current_b_0 b_1_samples[i] <- current_b_1 # Sample global variance parameter global_var_samples[i] <- sample_sigma2_one_iteration(outcome, rng, nu, lambda) current_sigma2 <- global_var_samples[i] } } # Forest predictions mu_hat <- forest_samples_mu$predict(forest_dataset_mu)*y_std + y_bar tau_hat_raw <- forest_samples_tau$predict_raw(forest_dataset_tau) tau_hat <- t(t(tau_hat_raw) * (b_1_samples - b_0_samples))*y_std y_hat <- mu_hat + tau_hat * as.numeric(Z) # Global error variance sigma2_samples <- global_var_samples*(y_std^2)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results-3","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect XBART results Inspect warm start BART results Inspect “adaptive coding” parameters \\(b_0\\) \\(b_1\\).","code":"plot(sigma2_samples[1:num_gfr], ylab=\"sigma^2\") plot(rowMeans(mu_hat[,1:num_gfr]), mu_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"prognostic term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(tau_hat[,1:num_gfr]), tau_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"treatment effect term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) mean((rowMeans(tau_hat[,1:num_gfr]) - tau_x)^2) #> [1] 0.2857801 plot(sigma_samples[(num_gfr+1):num_samples], ylab=\"sigma^2\") plot(rowMeans(mu_hat[,(num_gfr+1):num_samples]), mu_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"prognostic term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(tau_hat[,(num_gfr+1):num_samples]), tau_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"treatment effect term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) mean((rowMeans(tau_hat[,(num_gfr+1):num_samples]) - tau_x)^2) #> [1] 0.3717923 plot(b_0_samples, col = \"blue\", ylab = \"Coding parameter draws\", ylim = c(min(min(b_0_samples), min(b_1_samples)), max(max(b_0_samples), max(b_1_samples)))) points(b_1_samples, col = \"orange\") legend(\"topleft\", legend = c(\"b_0\", \"b_1\"), col = c(\"blue\", \"orange\"), pch = c(1,1))"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Drew Herren. Author, maintainer. Richard Hahn. Author. Jared Murray. Author. Carlos Carvalho. Author. Jingyu . Author.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Herren D, Hahn R, Murray J, Carvalho C, J (2024). stochtree: Stochastic tree ensembles (XBART BART) supervised learning causal inference. R package version 0.0.0.9000, https://stochastictree.github.io/stochtree-r/.","code":"@Manual{, title = {stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference}, author = {Drew Herren and Richard Hahn and Jared Murray and Carlos Carvalho and Jingyu He}, year = {2024}, note = {R package version 0.0.0.9000}, url = {https://stochastictree.github.io/stochtree-r/}, }"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/index.html","id":"getting-started","dir":"","previous_headings":"","what":"Getting started","title":"Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference","text":"package can installed R via","code":"remotes::install_github(\"StochasticTree/stochtree-r\")"},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":null,"dir":"Reference","previous_headings":"","what":"Run the BART algorithm for supervised learning. — bart","title":"Run the BART algorithm for supervised learning. — bart","text":"Run BART algorithm supervised learning.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run the BART algorithm for supervised learning. — bart","text":"","code":"bart( X_train, y_train, W_train = NULL, group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, feature_types = rep(0, ncol(X_train)), variable_weights = rep(1/ncol(X_train), ncol(X_train)), cutpoint_grid_size = 100, tau_init = NULL, alpha = 0.95, beta = 2, min_samples_leaf = 5, leaf_model = 0, nu = 3, lambda = NULL, a_leaf = 3, b_leaf = NULL, q = 0.9, sigma2_init = NULL, num_trees = 200, num_gfr = 5, num_burnin = 0, num_mcmc = 100, sample_sigma = T, sample_tau = T, random_seed = -1 )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run the BART algorithm for supervised learning. — bart","text":"X_train Covariates used split trees ensemble. y_train Outcome modeled ensemble. W_train (Optional) Bases used define regression model y ~ W leaf regression tree. default, BART assumes constant leaf node parameters, implicitly regressing constant basis ones (.e. y ~ 1). group_ids_train (Optional) Group labels used additive random effects model. rfx_basis_train (Optional) Basis \"random-slope\" regression additive random effects model. group_ids_train provided regression basis, intercept-random effects model estimated. X_test (Optional) Test set covariates used define \"sample\" evaluation data. W_test (Optional) Test set bases used define \"sample\" evaluation data. test set optional, structure provided test set must match training set (.e. X_train W_train provided, test set must consist X_test W_test number columns). group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model. feature_types Vector length ncol(X_train) indicating \"type\" covariates (0 = numeric, 1 = ordered categorical, 2 = unordered categorical). Default: rep(0,ncol(X_train)). variable_weights Vector length ncol(X_train) indicating \"weight\" placed variable sampling purposes. Default: rep(1/ncol(X_train),ncol(X_train)). cutpoint_grid_size Maximum size \"grid\" potential cutpoints consider. Default: 100. tau_init Starting value leaf node scale parameter. Calibrated internally 1/num_trees set . alpha Prior probability splitting tree depth 0. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. beta Exponent decreases split probabilities nodes depth > 0. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. min_samples_leaf Minimum allowable size leaf, terms training samples. Default: 5. leaf_model Integer indicating leaf model, 0 = constant Gaussian prior, 1 = univariate regression Gaussian prior, 2 = multivariate regression Gaussian prior. W_train ignored set 0. Default: 0. nu Shape parameter IG(nu, nu*lambda) global error variance model. Default: 3. lambda Component scale parameter IG(nu, nu*lambda) global error variance prior. specified, calibrated Sparapani et al (2021). a_leaf Shape parameter IG(a_leaf, b_leaf) leaf node parameter variance model. Default: 3. b_leaf Scale parameter IG(a_leaf, b_leaf) leaf node parameter variance model. Calibrated internally 0.5/num_trees set . q Quantile used calibrated lambda Sparapani et al (2021). Default: 0.9. sigma2_init Starting value global variance parameter. Calibrated internally Sparapani et al (2021) set . num_trees Number trees ensemble. Default: 200. num_gfr Number \"warm-start\" iterations run using grow--root algorithm (Hahn, 2021). Default: 5. num_burnin Number \"burn-\" iterations MCMC sampler. Default: 0. num_mcmc Number \"retained\" iterations MCMC sampler. Default: 100. sample_sigma Whether update sigma^2 global error variance parameter based IG(nu, nu*lambda). Default: T. sample_tau Whether update tau leaf scale variance parameter based IG(a_leaf, b_leaf). set true leaf_model=2. Default: T. random_seed Integer parameterizing C++ random number generator. specified, C++ random number generator seeded according std::random_device.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run the BART algorithm for supervised learning. — bart","text":"List sampling outputs wrapper around sampled forests (can used -memory prediction new data, serialized JSON disk).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run the BART algorithm for supervised learning. — bart","text":"","code":"n <- 100 p <- 5 X <- matrix(runif(n*p), ncol = p) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) noise_sd <- 1 y <- f_XW + rnorm(n, 0, noise_sd) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, leaf_model = 0) # plot(rowMeans(bart_model$yhat_test), y_test, xlab = \"predicted\", ylab = \"actual\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"Persists C++ random number generator throughout R session ensure reproducibility given random seed. seed provided, C++ random number generator initialized using std::random_device.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"rng_ptr External pointer C++ std::mt19937 class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"CppRNG$new()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"Create new CppRNG object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"","code":"CppRNG$new(random_seed = -1)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"random_seed (Optional) random seed sampling","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"new CppRNG object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Dataset used to sample a forest — ForestDataset","title":"Dataset used to sample a forest — ForestDataset","text":"dataset consists three matrices / vectors: covariates, bases, variance weights. basis vector variance weights optional.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Dataset used to sample a forest — ForestDataset","text":"data_ptr External pointer C++ ForestDataset class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Dataset used to sample a forest — ForestDataset","text":"ForestDataset$new() ForestDataset$update_basis() ForestDataset$num_observations() ForestDataset$num_covariates() ForestDataset$num_basis() ForestDataset$has_basis() ForestDataset$has_variance_weights()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Dataset used to sample a forest — ForestDataset","text":"Create new ForestDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$new(covariates, basis = NULL, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dataset used to sample a forest — ForestDataset","text":"covariates Matrix covariates basis (Optional) Matrix bases used define leaf regression variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"new ForestDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-update-basis-","dir":"Reference","previous_headings":"","what":"Method update_basis()","title":"Dataset used to sample a forest — ForestDataset","text":"Update basis matrix dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$update_basis(basis)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dataset used to sample a forest — ForestDataset","text":"basis Updated matrix bases used define leaf regression","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-num-observations-","dir":"Reference","previous_headings":"","what":"Method num_observations()","title":"Dataset used to sample a forest — ForestDataset","text":"Return number observations ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$num_observations()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"Observation count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-num-covariates-","dir":"Reference","previous_headings":"","what":"Method num_covariates()","title":"Dataset used to sample a forest — ForestDataset","text":"Return number covariates ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$num_covariates()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"Covariate count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-num-basis-","dir":"Reference","previous_headings":"","what":"Method num_basis()","title":"Dataset used to sample a forest — ForestDataset","text":"Return number bases ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$num_basis()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"Basis count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-has-basis-","dir":"Reference","previous_headings":"","what":"Method has_basis()","title":"Dataset used to sample a forest — ForestDataset","text":"Whether dataset basis matrix","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$has_basis()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"True basis matrix loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-has-variance-weights-","dir":"Reference","previous_headings":"","what":"Method has_variance_weights()","title":"Dataset used to sample a forest — ForestDataset","text":"Whether dataset variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$has_variance_weights()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-5","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"True variance weights loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that defines and samples a forest model — ForestModel","title":"Class that defines and samples a forest model — ForestModel","text":"Hosts C++ data structures needed sample ensemble decision trees, exposes functionality run forest sampler (using either MCMC grow--root algorithm).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that defines and samples a forest model — ForestModel","text":"tracker_ptr External pointer C++ ForestTracker class tree_prior_ptr External pointer C++ TreePrior class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that defines and samples a forest model — ForestModel","text":"ForestModel$new() ForestModel$sample_one_iteration()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that defines and samples a forest model — ForestModel","text":"Create new ForestModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that defines and samples a forest model — ForestModel","text":"","code":"ForestModel$new( forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that defines and samples a forest model — ForestModel","text":"forest_dataset ForestDataset object, used initialize forest sampling data structures feature_types Feature types (integers 0 = numeric, 1 = ordered categorical, 2 = unordered categorical) num_trees Number trees forest sampled n Number observations forest_dataset alpha Root node split probability tree prior beta Depth prior penalty tree prior min_samples_leaf Minimum number samples tree leaf","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that defines and samples a forest model — ForestModel","text":"new ForestModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"method-sample-one-iteration-","dir":"Reference","previous_headings":"","what":"Method sample_one_iteration()","title":"Class that defines and samples a forest model — ForestModel","text":"Run single iteration forest sampling algorithm (MCMC GFR)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that defines and samples a forest model — ForestModel","text":"","code":"ForestModel$sample_one_iteration( forest_dataset, residual, forest_samples, rng, feature_types, leaf_model_int, leaf_model_scale, variable_weights, global_scale, cutpoint_grid_size = 500, gfr = T, pre_initialized = F )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that defines and samples a forest model — ForestModel","text":"forest_dataset Dataset used sample forest residual Outcome used sample forest forest_samples Container forest samples rng Wrapper around C++ random number generator feature_types Vector specifying type p covariates forest_dataset (0 = numeric, 1 = ordered categorical, 2 = unordered categorical) leaf_model_int Integer specifying leaf model type (0 = constant leaf, 1 = univariate leaf regression, 2 = multivariate leaf regression) leaf_model_scale Scale parameter used leaf node model (q x q matrix q dimensionality basis >1 leaf_model_int = 2) variable_weights Vector specifying sampling probability p covariates forest_dataset global_scale Global variance parameter cutpoint_grid_size (Optional) Number unique cutpoints consider (default: 500, currently used GFR = TRUE) gfr (Optional) Whether forest sampled using \"grow--root\" (GFR) algorithm pre_initialized (Optional) Whether leaves pre-initialized outside sampling loop (samples drawn). multi-forest implementations like BCF, true, though single-forest supervised learning implementation, can let C++ initialization. Default: F.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that stores draws from an random ensemble of decision trees — ForestSamples","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Wrapper around C++ container tree ensembles","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_container_ptr External pointer C++ ForestContainer class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"ForestSamples$new() ForestSamples$predict() ForestSamples$predict_raw() ForestSamples$predict_raw_single_forest() ForestSamples$set_root_leaves() ForestSamples$update_residual() ForestSamples$save_json() ForestSamples$load_json() ForestSamples$num_samples() ForestSamples$output_dimension()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Create new ForestContainer object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$new(num_trees, output_dimension = 1, is_leaf_constant = F)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"num_trees Number trees output_dimension Dimensionality outcome model is_leaf_constant Whether leaf constant","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"new ForestContainer object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-predict-","dir":"Reference","previous_headings":"","what":"Method predict()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Predict every tree ensemble every sample forest_dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$predict(forest_dataset)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_dataset ForestDataset R class","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"matrix predictions many rows forest_dataset many columns samples ForestContainer","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-predict-raw-","dir":"Reference","previous_headings":"","what":"Method predict_raw()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Predict \"raw\" leaf values (without multiplied basis) every tree ensemble every sample forest_dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$predict_raw(forest_dataset)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-2","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_dataset ForestDataset R class","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Array predictions observation forest_dataset sample ForestSamples class prediction dimensionality forests' leaf model. case constant leaf model univariate leaf regression, array two-dimensional (number observations, number forest samples). case multivariate leaf regression, array three-dimension (number observations, leaf model dimension, number samples).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-predict-raw-single-forest-","dir":"Reference","previous_headings":"","what":"Method predict_raw_single_forest()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Predict \"raw\" leaf values (without multiplied basis) specific forest every sample forest_dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$predict_raw_single_forest(forest_dataset, forest_num)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-3","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_dataset ForestDataset R class forest_num Index forest sample within container","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"matrix predictions many rows forest_dataset many columns samples ForestContainer","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-set-root-leaves-","dir":"Reference","previous_headings":"","what":"Method set_root_leaves()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Set constant predicted value every tree ensemble. Stops program tree root node.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$set_root_leaves(forest_num, leaf_value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-4","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_num Index forest sample within container. leaf_value Constant leaf value(s) fixed tree ensemble indexed forest_num. Can either single number vector, depending forest's leaf dimension.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-update-residual-","dir":"Reference","previous_headings":"","what":"Method update_residual()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Updates residual based predictions forest","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$update_residual( dataset, outcome, forest_model, requires_basis, forest_num, add )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-5","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"dataset ForestDataset object storing covariates bases given forest outcome Outcome object storing residuals updated based forest predictions forest_model ForestModel object storing tracking structures used training / sampling requires_basis Whether forest requires basis prediction forest_num Index forest used update residuals add Whether forest predictions added subtracted residuals","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-save-json-","dir":"Reference","previous_headings":"","what":"Method save_json()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Store trees metadata ForestDataset class json file","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$save_json(json_filename)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-6","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"json_filename Name output json file (must end \".json\")","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-load-json-","dir":"Reference","previous_headings":"","what":"Method load_json()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Load trees metadata ensemble json file. Note trees metadata already present ForestDataset class overwritten.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-7","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$load_json(json_filename)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-7","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"json_filename Name model input json file (must end \".json\")","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-num-samples-","dir":"Reference","previous_headings":"","what":"Method num_samples()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Return number samples ForestContainer object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-8","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$num_samples()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Sample count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-output-dimension-","dir":"Reference","previous_headings":"","what":"Method output_dimension()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Return output dimension trees ForestContainer object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-9","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$output_dimension()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-5","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Leaf node parameter size","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":null,"dir":"Reference","previous_headings":"","what":"Outcome / partial residual used to sample an additive model. — Outcome","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"outcome class wrapper around vector (mutable) outcomes ML tasks (supervised learning, causal inference). additive tree ensemble sampled, outcome used sample specific model term \"partial residual\" consisting outcome minus predictions every model term (trees, group random effects, etc...).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"data_ptr External pointer C++ Outcome class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"Outcome$new()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"Create new Outcome object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"","code":"Outcome$new(outcome)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"outcome Vector outcome values","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"new Outcome object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that wraps the ","title":"Class that wraps the ","text":"Coordinates various C++ random effects classes persists needed prediction / serialization","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that wraps the ","text":"rfx_container_ptr External pointer C++ StochTree::RandomEffectsContainer class label_mapper_ptr External pointer C++ StochTree::LabelMapper class training_group_ids Unique vector group IDs training dataset","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that wraps the ","text":"RandomEffectSamples$new() RandomEffectSamples$predict() RandomEffectSamples$extract_parameter_samples() RandomEffectSamples$extract_label_mapping()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that wraps the ","text":"Create new RandomEffectSamples object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$new(num_components, num_groups, random_effects_tracker)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that wraps the ","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups random_effects_tracker Object type RandomEffectsTracker","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"new RandomEffectSamples object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-predict-","dir":"Reference","previous_headings":"","what":"Method predict()","title":"Class that wraps the ","text":"Predict random effects observation implied rfx_group_ids rfx_basis. random effects model \"intercept-\" rfx_basis vector ones size length(rfx_group_ids).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$predict(rfx_group_ids, rfx_basis)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that wraps the ","text":"rfx_group_ids Indices random effects groups prediction set rfx_basis Basis used random effects prediction","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"Matrix many rows observations provided many columns samples drawn model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-extract-parameter-samples-","dir":"Reference","previous_headings":"","what":"Method extract_parameter_samples()","title":"Class that wraps the ","text":"Extract random effects parameters sampled. \"redundant parameterization\" Gelman et al (2008), includes four parameters: alpha (\"working parameter\" shared across every group), xi (\"group parameter\" sampled separately group), beta (product alpha xi, corresponds overall group-level random effects), sigma (group-independent prior variance component xi).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$extract_parameter_samples()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"List arrays. alpha array dimension (num_components, num_samples) simply vector num_components = 1. xi beta arrays dimension (num_components, num_groups, num_samples) simply matrix num_components = 1. sigma array dimension (num_components, num_samples) simply vector num_components = 1.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-extract-label-mapping-","dir":"Reference","previous_headings":"","what":"Method extract_label_mapping()","title":"Class that wraps the ","text":"Convert mapping group IDs random effect components indices C++ R native format","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$extract_label_mapping()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"List mapping group ID random effect components.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Dataset used to sample a random effects model — RandomEffectsDataset","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"dataset consists three matrices / vectors: group labels, bases, variance weights. Variance weights optional.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"data_ptr External pointer C++ RandomEffectsDataset class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"RandomEffectsDataset$new() RandomEffectsDataset$num_observations() RandomEffectsDataset$has_group_labels() RandomEffectsDataset$has_basis() RandomEffectsDataset$has_variance_weights()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Create new RandomEffectsDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$new(group_labels, basis, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"group_labels Vector group labels basis Matrix bases used define random effects regression (intercept-model, pass array ones) variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"new RandomEffectsDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-num-observations-","dir":"Reference","previous_headings":"","what":"Method num_observations()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Return number observations RandomEffectsDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$num_observations()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Observation count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-has-group-labels-","dir":"Reference","previous_headings":"","what":"Method has_group_labels()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Whether dataset group label indices","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$has_group_labels()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"True group label vector loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-has-basis-","dir":"Reference","previous_headings":"","what":"Method has_basis()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Whether dataset basis matrix","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$has_basis()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"True basis matrix loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-has-variance-weights-","dir":"Reference","previous_headings":"","what":"Method has_variance_weights()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Whether dataset variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$has_variance_weights()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"True variance weights loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":null,"dir":"Reference","previous_headings":"","what":"The core ","title":"The core ","text":"Stores current model state, prior parameters, procedures sampling conditional posterior parameter.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"The core ","text":"rfx_model_ptr External pointer C++ StochTree::RandomEffectsModel class num_groups Number groups random effects model num_components Number components (.e. dimension basis) random effects model","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"The core ","text":"RandomEffectsModel$new() RandomEffectsModel$sample_random_effect() RandomEffectsModel$set_working_parameter() RandomEffectsModel$set_group_parameters() RandomEffectsModel$set_working_parameter_cov() RandomEffectsModel$set_group_parameter_cov() RandomEffectsModel$set_variance_prior_shape() RandomEffectsModel$set_variance_prior_scale()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"The core ","text":"Create new RandomEffectsModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$new(num_components, num_groups)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"new RandomEffectsModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-sample-random-effect-","dir":"Reference","previous_headings":"","what":"Method sample_random_effect()","title":"The core ","text":"Sample random effects model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$sample_random_effect( rfx_dataset, residual, rfx_tracker, rfx_samples, global_variance, rng )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"rfx_dataset Object type RandomEffectsDataset residual Object type Outcome rfx_tracker Object type RandomEffectsTracker rfx_samples Object type RandomEffectSamples global_variance Scalar global variance parameter rng Object type CppRNG","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-working-parameter-","dir":"Reference","previous_headings":"","what":"Method set_working_parameter()","title":"The core ","text":"Set value \"working parameter.\" typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_working_parameter(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-2","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-group-parameters-","dir":"Reference","previous_headings":"","what":"Method set_group_parameters()","title":"The core ","text":"Set value \"group parameters.\" typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_group_parameters(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-3","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-working-parameter-cov-","dir":"Reference","previous_headings":"","what":"Method set_working_parameter_cov()","title":"The core ","text":"Set value working parameter covariance. typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_working_parameter_cov(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-4","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-group-parameter-cov-","dir":"Reference","previous_headings":"","what":"Method set_group_parameter_cov()","title":"The core ","text":"Set value group parameter covariance. typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_group_parameter_cov(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-5","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-5","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-variance-prior-shape-","dir":"Reference","previous_headings":"","what":"Method set_variance_prior_shape()","title":"The core ","text":"Set shape parameter group parameter variance prior.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_variance_prior_shape(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-6","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-6","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-variance-prior-scale-","dir":"Reference","previous_headings":"","what":"Method set_variance_prior_scale()","title":"The core ","text":"Set shape parameter group parameter variance prior.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-7","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_variance_prior_scale(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-7","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-7","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that defines a ","title":"Class that defines a ","text":"Stores mapping every observation group index, mapping group indices training sample observations available group, predictions observation.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that defines a ","text":"rfx_tracker_ptr External pointer C++ StochTree::RandomEffectsTracker class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that defines a ","text":"RandomEffectsTracker$new()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that defines a ","text":"Create new RandomEffectsTracker object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that defines a ","text":"","code":"RandomEffectsTracker$new(rfx_group_indices)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that defines a ","text":"rfx_group_indices Integer indices indicating groups used define random effects","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that defines a ","text":"new RandomEffectsTracker object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":null,"dir":"Reference","previous_headings":"","what":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"Run Bayesian Causal Forest (BCF) algorithm regularized causal effect estimation.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"","code":"bcf( X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, Z_test = NULL, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, feature_types = rep(0, ncol(X_train)), cutpoint_grid_size = 100, sigma_leaf_mu = NULL, sigma_leaf_tau = NULL, alpha_mu = 0.95, alpha_tau = 0.25, beta_mu = 2, beta_tau = 3, min_samples_leaf_mu = 5, min_samples_leaf_tau = 5, nu = 3, lambda = NULL, a_leaf_mu = 3, a_leaf_tau = 3, b_leaf_mu = NULL, b_leaf_tau = NULL, q = 0.9, sigma2 = NULL, num_trees_mu = 250, num_trees_tau = 50, num_gfr = 5, num_burnin = 0, num_mcmc = 100, sample_sigma_global = T, sample_sigma_leaf_mu = T, sample_sigma_leaf_tau = T, propensity_covariate = \"mu\", adaptive_coding = T, b_0 = -0.5, b_1 = 0.5, random_seed = -1 )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"X_train Covariates used split trees ensemble. Z_train Vector (continuous binary) treatment assignments. y_train Outcome modeled ensemble. pi_train (Optional) Vector propensity scores. provided, estimated data. group_ids_train (Optional) Group labels used additive random effects model. rfx_basis_train (Optional) Basis \"random-slope\" regression additive random effects model. group_ids_train provided regression basis, intercept-random effects model estimated. X_test (Optional) Test set covariates used define \"sample\" evaluation data. Z_test (Optional) Test set (continuous binary) treatment assignments. pi_test (Optional) Vector propensity scores. provided, estimated data. group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model. feature_types Vector length ncol(X_train) indicating \"type\" covariates (0 = numeric, 1 = ordered categorical, 2 = unordered categorical). Default: rep(0,ncol(X_train)). cutpoint_grid_size Maximum size \"grid\" potential cutpoints consider. Default: 100. sigma_leaf_mu Starting value leaf node scale parameter prognostic forest. Calibrated internally 2/num_trees_mu set . sigma_leaf_tau Starting value leaf node scale parameter treatment effect forest. Calibrated internally 1/num_trees_tau set . alpha_mu Prior probability splitting tree depth 0 prognostic forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 0.95. alpha_tau Prior probability splitting tree depth 0 treatment effect forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 0.25. beta_mu Exponent decreases split probabilities nodes depth > 0 prognostic forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 2.0. beta_tau Exponent decreases split probabilities nodes depth > 0 treatment effect forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 3.0. min_samples_leaf_mu Minimum allowable size leaf, terms training samples, prognostic forest. Default: 5. min_samples_leaf_tau Minimum allowable size leaf, terms training samples, treatment effect forest. Default: 5. nu Shape parameter IG(nu, nu*lambda) global error variance model. Default: 3. lambda Component scale parameter IG(nu, nu*lambda) global error variance prior. specified, calibrated Sparapani et al (2021). a_leaf_mu Shape parameter IG(a_leaf, b_leaf) leaf node parameter variance model prognostic forest. Default: 3. a_leaf_tau Shape parameter IG(a_leaf, b_leaf) leaf node parameter variance model treatment effect forest. Default: 3. b_leaf_mu Scale parameter IG(a_leaf, b_leaf) leaf node parameter variance model prognostic forest. Calibrated internally 0.5/num_trees set . b_leaf_tau Scale parameter IG(a_leaf, b_leaf) leaf node parameter variance model treatment effect forest. Calibrated internally 0.5/num_trees set . q Quantile used calibrated lambda Sparapani et al (2021). Default: 0.9. sigma2 Starting value global variance parameter. Calibrated internally Sparapani et al (2021) set . num_trees_mu Number trees prognostic forest. Default: 200. num_trees_tau Number trees treatment effect forest. Default: 50. num_gfr Number \"warm-start\" iterations run using grow--root algorithm (Hahn, 2021). Default: 5. num_burnin Number \"burn-\" iterations MCMC sampler. Default: 0. num_mcmc Number \"retained\" iterations MCMC sampler. Default: 100. sample_sigma_global Whether update sigma^2 global error variance parameter based IG(nu, nu*lambda). Default: T. sample_sigma_leaf_mu Whether update sigma_leaf_mu leaf scale variance parameter prognostic forest based IG(a_leaf_mu, b_leaf_mu). Default: T. sample_sigma_leaf_tau Whether update sigma_leaf_tau leaf scale variance parameter treatment effect forest based IG(a_leaf_tau, b_leaf_tau). Default: T. propensity_covariate Whether include propensity score covariate either forests. Enter \"none\" neither, \"mu\" prognostic forest, \"tau\" treatment forest, \"\" forests. \"none\" propensity score provided, estimated (X_train, Z_train) using xgboost. Default: \"mu\". adaptive_coding Whether use \"adaptive coding\" scheme binary treatment variable coded manually (0,1) (-1,1) learned via parameters b_0 b_1 attach outcome model [b_0 (1-Z) + b_1 Z] tau(X). ignored Z binary. Default: T. b_0 Initial value \"control\" group coding parameter. ignored Z binary. Default: -0.5. b_1 Initial value \"treatment\" group coding parameter. ignored Z binary. Default: 0.5. random_seed Integer parameterizing C++ random number generator. specified, C++ random number generator seeded according std::random_device.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"List sampling outputs wrapper around sampled forests (can used -memory prediction new data, serialized JSON disk).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 4 y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test) # plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") # abline(0,1,col=\"red\",lty=3,lwd=3) # plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a container of forest samples — createForestContainer","title":"Create a container of forest samples — createForestContainer","text":"Create container forest samples","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a container of forest samples — createForestContainer","text":"","code":"createForestContainer(num_trees, output_dimension = 1, is_leaf_constant = F)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a container of forest samples — createForestContainer","text":"num_trees Number trees output_dimension Dimensionality outcome model is_leaf_constant Whether leaf constant","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a container of forest samples — createForestContainer","text":"ForestSamples object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a forest dataset object — createForestDataset","title":"Create a forest dataset object — createForestDataset","text":"Create forest dataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a forest dataset object — createForestDataset","text":"","code":"createForestDataset(covariates, basis = NULL, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a forest dataset object — createForestDataset","text":"covariates Matrix covariates basis (Optional) Matrix bases used define leaf regression variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a forest dataset object — createForestDataset","text":"ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a forest model object — createForestModel","title":"Create a forest model object — createForestModel","text":"Create forest model object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a forest model object — createForestModel","text":"","code":"createForestModel( forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a forest model object — createForestModel","text":"forest_dataset ForestDataset object, used initialize forest sampling data structures feature_types Feature types (integers 0 = numeric, 1 = ordered categorical, 2 = unordered categorical) num_trees Number trees forest sampled n Number observations forest_dataset alpha Root node split probability tree prior beta Depth prior penalty tree prior min_samples_leaf Minimum number samples tree leaf","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a forest model object — createForestModel","text":"ForestModel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":null,"dir":"Reference","previous_headings":"","what":"Create an outcome object — createOutcome","title":"Create an outcome object — createOutcome","text":"Create outcome object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create an outcome object — createOutcome","text":"","code":"createOutcome(outcome)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create an outcome object — createOutcome","text":"outcome Vector outcome values","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create an outcome object — createOutcome","text":"Outcome object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":null,"dir":"Reference","previous_headings":"","what":"Create an R class that wraps a C++ random number generator — createRNG","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"Create R class wraps C++ random number generator","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"","code":"createRNG(random_seed = -1)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"random_seed (Optional) random seed sampling","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"CppRng object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a RandomEffectSamples object — createRandomEffectSamples","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"Create RandomEffectSamples object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"","code":"createRandomEffectSamples(num_components, num_groups, random_effects_tracker)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups random_effects_tracker Object type RandomEffectsTracker","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"RandomEffectSamples object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a random effects dataset object — createRandomEffectsDataset","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"Create random effects dataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"","code":"createRandomEffectsDataset(group_labels, basis, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"group_labels Vector group labels basis Matrix bases used define random effects regression (intercept-model, pass array ones) variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"RandomEffectsDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a RandomEffectsModel object — createRandomEffectsModel","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"Create RandomEffectsModel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"","code":"createRandomEffectsModel(num_components, num_groups)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"RandomEffectsModel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a RandomEffectsTracker object — createRandomEffectsTracker","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"Create RandomEffectsTracker object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"","code":"createRandomEffectsTracker(rfx_group_indices)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"rfx_group_indices Integer indices indicating groups used define random effects","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"RandomEffectsTracker object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"Extract raw sample values random effect parameter terms.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"","code":"# S3 method for bartmodel getRandomEffectSamples(object, ...)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"object Object type bcf containing draws Bayesian causal forest model associated sampling outputs.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"List arrays. alpha array dimension (num_components, num_samples) simply vector num_components = 1. xi beta arrays dimension (num_components, num_groups, num_samples) simply matrix num_components = 1. sigma array dimension (num_components, num_samples) simply vector num_components = 1.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"","code":"n <- 100 p <- 5 X <- matrix(runif(n*p), ncol = p) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) snr <- 3 group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) E_y <- f_XW + rfx_term y <- E_y + rnorm(n, 0, 1)*(sd(E_y)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, sample_tau = T) rfx_samples <- getRandomEffectSamples(bart_model)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"Extract raw sample values random effect parameter terms.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"","code":"# S3 method for bcf getRandomEffectSamples(object, ...)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"object Object type bcf containing draws Bayesian causal forest model associated sampling outputs.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"List arrays. alpha array dimension (num_components, num_samples) simply vector num_components = 1. xi beta arrays dimension (num_components, num_groups, num_samples) simply matrix num_components = 1. sigma array dimension (num_components, num_samples) simply vector num_components = 1.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, feature_types = c(0,0,0,1,1), num_gfr = 100, num_burnin = 0, num_mcmc = 100, sample_sigma_leaf_mu = T, sample_sigma_leaf_tau = F) rfx_samples <- getRandomEffectSamples(bcf_model)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","title":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","text":"Generic function extracting random effect samples model object (BCF, BART, etc...)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","text":"","code":"getRandomEffectSamples(object, ...)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","text":"List random effect samples","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict from a sampled BART model on new data — predict.bartmodel","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"Predict sampled BART model new data","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"","code":"# S3 method for bartmodel predict( bart, X_test, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"bart Object type bart containing draws regression forest associated sampling outputs. X_test Covariates used determine tree leaf predictions observation. W_test (Optional) Bases used prediction (e.g. dot product leaf values). Default: NULL. group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"List prediction matrices. model random effects, list one element -- predictions forest. model random effects, list three elements -- forest predictions, random effects predictions, sum (y_hat).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"","code":"n <- 100 p <- 5 X <- matrix(runif(n*p), ncol = p) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) noise_sd <- 1 y <- f_XW + rnorm(n, 0, noise_sd) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, leaf_model = 0) yhat_test <- predict(bart_model, X_test) #> Error in (bart$model_params$has_rfx_basis) && (is.null(rfx_basis_test)): invalid 'x' type in 'x && y' # plot(rowMeans(yhat_test), y_test, xlab = \"predicted\", ylab = \"actual\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict from a sampled BCF model on new data — predict.bcf","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"Predict sampled BCF model new data","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"","code":"# S3 method for bcf predict( bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"bcf Object type bcf containing draws Bayesian causal forest model associated sampling outputs. X_test Covariates used determine tree leaf predictions observation. Z_test Treatments used prediction. pi_test (Optional) Propensities used prediction. group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"List three (four) nrow(X_test) bcf$num_samples matrices: prognostic function estimates, treatment effect estimates, (possibly) random effects predictions, outcome predictions.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 4 y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train) preds <- predict(bcf_model, X_test, Z_test, pi_test) #> Error in (bcf$model_params$has_rfx_basis) && (is.null(rfx_basis_test)): invalid 'x' type in 'x && y' # plot(rowMeans(preds$mu_hat), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") # abline(0,1,col=\"red\",lty=3,lwd=3) # plot(rowMeans(preds$tau_hat), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_sigma2_one_iteration.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","title":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","text":"Sample one iteration global variance model","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_sigma2_one_iteration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","text":"","code":"sample_sigma2_one_iteration(residual, rng, nu, lambda)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_sigma2_one_iteration.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","text":"residual Outcome class rng C++ random number generator nu Global variance shape parameter lambda Constitutes scale parameter global variance along nu (.e. scale nu*lambda)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_tau_one_iteration.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","title":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","text":"Sample one iteration leaf parameter variance model (univariate basis constant leaf!)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_tau_one_iteration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","text":"","code":"sample_tau_one_iteration(forest_samples, rng, a, b, sample_num)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_tau_one_iteration.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","text":"forest_samples Container forest samples rng C++ random number generator Leaf variance shape parameter b Leaf variance scale parameter sample_num Sample index","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/stochtree-package.html","id":null,"dir":"Reference","previous_headings":"","what":"stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference — stochtree-package","title":"stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference — stochtree-package","text":"Stochastic tree ensembles (XBART BART) supervised learning causal inference","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/stochtree-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference — stochtree-package","text":"Maintainer: Drew Herren drewherrenopensource@gmail.com (ORCID) Authors: Richard Hahn Jared Murray Carlos Carvalho Jingyu ","code":""}] +[{"path":"https://stochastictree.github.io/stochtree-r/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2024 stochtree authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"simulation","dir":"Articles","previous_headings":"Demo 1: Step Function","what":"Simulation","title":"Bayesian Supervised Learning in StochTree","text":", generate data simple step function.","code":"# Generate the data n <- 500 p_x <- 10 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) noise_sd <- sd(f_XW) / snr y <- f_XW + rnorm(n, 0, 1)*noise_sd # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] W_test <- NULL W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"warmstart","dir":"Articles","previous_headings":"Demo 1: Step Function > Sampling and Analysis","what":"Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Hahn (2023)). default stochtree. Inspect initial XBART “warm-start” samples Inspect BART samples initialized XBART warm-start","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_warmstart <- stochtree::bart( X_train = X_train, y_train = y_train, X_test = X_test, leaf_model = 0, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_warmstart$sigma2_samples[1:num_gfr], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,1:num_gfr]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(bart_model_warmstart$sigma2_samples[(num_gfr + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,(num_gfr + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"bart-mcmc-without-warmstart","dir":"Articles","previous_headings":"Demo 1: Step Function > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin.","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_root <- stochtree::bart( X_train = X_train, y_train = y_train, X_test = X_test, leaf_model = 0, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_root$sigma2_samples[(num_burnin + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_root$yhat_test[,(num_burnin + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"simulation-1","dir":"Articles","previous_headings":"Demo 2: Partitioned Linear Model","what":"Simulation","title":"Bayesian Supervised Learning in StochTree","text":", generate data simple partitioned linear model.","code":"# Generate the data n <- 500 p_x <- 10 p_w <- 1 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) W <- matrix(runif(n*p_w), ncol = p_w) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) ) noise_sd <- sd(f_XW) / snr y <- f_XW + rnorm(n, 0, 1)*noise_sd # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] W_test <- W[test_inds,] W_train <- W[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"warmstart-1","dir":"Articles","previous_headings":"Demo 2: Partitioned Linear Model > Sampling and Analysis","what":"Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Hahn (2023)). default stochtree. Inspect initial XBART “warm-start” samples Inspect BART samples initialized XBART warm-start","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_warmstart <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_warmstart$sigma2_samples[1:num_gfr], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,1:num_gfr]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(bart_model_warmstart$sigma2_samples[(num_gfr + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_warmstart$yhat_test[,(num_gfr + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"bart-mcmc-without-warmstart-1","dir":"Articles","previous_headings":"Demo 2: Partitioned Linear Model > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin.","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_root <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_root$sigma2_samples[(num_burnin + 1):num_samples], ylab=\"sigma^2\") plot(rowMeans(bart_model_root$yhat_test[,(num_burnin + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"simulation-2","dir":"Articles","previous_headings":"Demo 3: Partitioned Linear Model with Random Effects","what":"Simulation","title":"Bayesian Supervised Learning in StochTree","text":", generate data simple partitioned linear model additive random effect structure.","code":"# Generate the data n <- 500 p_x <- 10 p_w <- 1 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) W <- matrix(runif(n*p_w), ncol = p_w) group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-5, -3, 5, 3),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) ) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) noise_sd <- sd(f_XW) / snr y <- f_XW + rfx_term + rnorm(n, 0, 1)*noise_sd # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] W_test <- W[test_inds,] W_train <- W[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"warmstart-2","dir":"Articles","previous_headings":"Demo 3: Partitioned Linear Model with Random Effects > Sampling and Analysis","what":"Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Hahn (2023)). default stochtree. Inspect initial XBART “warm-start” samples Inspect BART samples initialized XBART warm-start","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_warmstart <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, W_test = W_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_warmstart$sigma2_samples[1:num_gfr], ylab=\"sigma^2\") abline(h=noise_sd^2,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(bart_model_warmstart$yhat_test[,1:num_gfr]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(bart_model_warmstart$sigma2_samples[(num_gfr + 1):num_samples], ylab=\"sigma^2\") abline(h=noise_sd^2,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(bart_model_warmstart$yhat_test[,(num_gfr + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Bayesian-Supervised-Learning.html","id":"bart-mcmc-without-warmstart-2","dir":"Articles","previous_headings":"Demo 3: Partitioned Linear Model with Random Effects > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Bayesian Supervised Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin.","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bart_model_root <- stochtree::bart( X_train = X_train, W_train = W_train, y_train = y_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, W_test = W_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, leaf_model = 1, num_trees = 100, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma = T, sample_tau = T ) plot(bart_model_root$sigma2_samples[(num_burnin + 1):num_samples], ylab=\"sigma^2\") abline(h=noise_sd^2,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(bart_model_root$yhat_test[,(num_burnin + 1):num_samples]), y_test, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-1-nonlinear-outcome-model-heterogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process Hahn, Murray, Carvalho (2020): \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= 1 + g(X) + 6 \\lvert X_3 - 1 \\rvert\\\\ \\tau(X) &= 1 + 2 X_2 X_4\\\\ g(X) &= \\mathbb{}(X_5=1) \\times 2 - \\mathbb{}(X_5=2) \\times 1 - \\mathbb{}(X_5=3) \\times 4\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim \\text{Bernoulli}(1/2)\\\\ X_5 &\\sim \\text{Categorical}(1/3,1/3,1/3)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation","dir":"Articles","previous_headings":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart","dir":"Articles","previous_headings":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 1000 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.98"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart","dir":"Articles","previous_headings":"Demo 1: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 1000 num_mcmc <- 1000 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.96"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-2-linear-outcome-model-heterogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process Hahn, Murray, Carvalho (2020): \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= 1 + g(X) + 6 X_1 X_3\\\\ \\tau(X) &= 1 + 2 X_2 X_4\\\\ g(X) &= \\mathbb{}(X_5=1) \\times 2 - \\mathbb{}(X_5=2) \\times 1 - \\mathbb{}(X_5=3) \\times 4\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim \\text{Bernoulli}(1/2)\\\\ X_5 &\\sim \\text{Categorical}(1/3,1/3,1/3)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-1","dir":"Articles","previous_headings":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu2(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-1","dir":"Articles","previous_headings":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.9"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart-1","dir":"Articles","previous_headings":"Demo 2: Linear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.72"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-3-linear-outcome-model-homogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process Hahn, Murray, Carvalho (2020): \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= 1 + g(X) + 6 X_1 X_3\\\\ \\tau(X) &= 3\\\\ g(X) &= \\mathbb{}(X_5=1) \\times 2 - \\mathbb{}(X_5=2) \\times 1 - \\mathbb{}(X_5=3) \\times 4\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim \\text{Bernoulli}(1/2)\\\\ X_5 &\\sim \\text{Categorical}(1/3,1/3,1/3)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-2","dir":"Articles","previous_headings":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu2(X) tau_x <- tau1(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-2","dir":"Articles","previous_headings":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 1"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart-2","dir":"Articles","previous_headings":"Demo 3: Linear Outcome Model, Homogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 1"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-4-nonlinear-outcome-model-heterogeneous-treatment-effect","dir":"Articles","previous_headings":"","what":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect","title":"Causal Machine Learning in StochTree","text":"consider following data generating process: \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) Z + \\epsilon\\\\ \\epsilon &\\sim N\\left(0,\\sigma^2\\right)\\\\ \\mu(X) &= \\begin{cases} -1.1 & \\text{ } X_1 > X_2\\\\ 0.9 & \\text{ } X_1 \\leq X_2 \\end{cases}\\\\ \\tau(X) &= \\frac{1}{1+\\exp(-X_3)} + \\frac{X_2}{10}\\\\ \\pi(X) &= \\Phi\\left(\\mu(X)\\right)\\\\ Z &\\sim \\text{Bernoulli}\\left(\\pi(X)\\right)\\\\ X_1,X_2,X_3 &\\sim N\\left(0,1\\right)\\\\ X_4 &\\sim N\\left(X_2,1\\right)\\\\ \\end{aligned} \\end{equation*}\\]","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-3","dir":"Articles","previous_headings":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw DGP defined ","code":"n <- 1000 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- rnorm(n,x2,1) X <- cbind(x1,x2,x3,x4) p <- ncol(X) mu <- function(x) {-1*(x[,1]>(x[,2])) + 1*(x[,1]<(x[,2])) - 0.1} tau <- function(x) {1/(1 + exp(-x[,3])) + x[,2]/10} mu_x <- mu(X) tau_x <- tau(X) pi_x <- pnorm(mu_x) Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x sigma <- diff(range(mu_x + tau_x*pi))/8 y <- E_XZ + sigma*rnorm(n) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-3","dir":"Articles","previous_headings":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"first simulate ensemble model \\(y \\mid X\\) using “warm-start” initialization samples (Krantsevich, , Hahn (2023)). default stochtree. Inspect BART samples initialized XBART warm-start Examine test set interval coverage","code":"num_gfr <- 10 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.995"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"bart-mcmc-without-warmstart-3","dir":"Articles","previous_headings":"Demo 4: Nonlinear Outcome Model, Heterogeneous Treatment Effect > Sampling and Analysis","what":"BART MCMC without Warmstart","title":"Causal Machine Learning in StochTree","text":"Next, simulate ensemble model without warm-start initialization. Inspect BART samples burnin Examine test set interval coverage","code":"num_gfr <- 0 num_burnin <- 100 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_root <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F ) sample_inds <- (num_burnin+1):num_samples plot(rowMeans(bcf_model_root$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_root$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ) plot_bounds <- c(min(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_root$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_root$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_root$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.98"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"demo-5-nonlinear-outcome-model-heterogeneous-treatment-effect-with-additive-random-effects","dir":"Articles","previous_headings":"","what":"Demo 5: Nonlinear Outcome Model, Heterogeneous Treatment Effect with Additive Random Effects","title":"Causal Machine Learning in StochTree","text":"augment simulated example Demo 1 additive random effect structure show bcf() function can estimate incorporate effects forest sampling procedure.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"simulation-4","dir":"Articles","previous_headings":"Demo 5: Nonlinear Outcome Model, Heterogeneous Treatment Effect with Additive Random Effects","what":"Simulation","title":"Causal Machine Learning in StochTree","text":"draw augmented “demo 1” DGP","code":"n <- 500 snr <- 3 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Split data into test and train sets test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds]"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html","id":"warmstart-4","dir":"Articles","previous_headings":"Demo 5: Nonlinear Outcome Model, Heterogeneous Treatment Effect with Additive Random Effects > Sampling and Analysis","what":"Warmstart","title":"Causal Machine Learning in StochTree","text":"simulate “warm-start” model (running root-MCMC BART random effects simply matter modifying code snippet setting num_gfr <- 0 num_mcmc > 0). Inspect BART samples initialized XBART warm-start Examine test set interval coverage clear causal inference much difficult presence strong covariate-dependent prognostic effects strong group-level random effects. sense, proper prior calibration three \\(\\mu\\), \\(\\tau\\) random effects models crucial.","code":"num_gfr <- 100 num_burnin <- 0 num_mcmc <- 500 num_samples <- num_gfr + num_burnin + num_mcmc bcf_model_warmstart <- bcf( X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, feature_types = c(0,0,0,1,1), num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, sample_sigma_leaf_mu = T, sample_sigma_leaf_tau = F ) sample_inds <- (num_gfr+1):num_samples plot(rowMeans(bcf_model_warmstart$mu_hat_test[,sample_inds]), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$tau_hat_test[,sample_inds]), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$y_hat_test[,sample_inds]), y_test, xlab = \"predicted\", ylab = \"actual\", main = \"Outcome\") abline(0,1,col=\"red\",lty=3,lwd=3) plot(rowMeans(bcf_model_warmstart$rfx_preds_test[,sample_inds]), rfx_term_test, xlab = \"predicted\", ylab = \"actual\", main = \"Random effects terms\") abline(0,1,col=\"red\",lty=3,lwd=3) sigma_observed <- var(y-E_XZ-rfx_term) plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed)), max(c(bcf_model_warmstart$sigma2_samples[sample_inds], sigma_observed))) plot(bcf_model_warmstart$sigma2_samples[sample_inds], ylim = plot_bounds, ylab = \"sigma^2\", xlab = \"Sample\", main = \"Global variance parameter\") abline(h = sigma_observed, lty=3, lwd = 3, col = \"blue\") test_lb <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.025) test_ub <- apply(bcf_model_warmstart$tau_hat_test, 1, quantile, 0.975) cover <- ( (test_lb <= tau_x[test_inds]) & (test_ub >= tau_x[test_inds]) ) mean(cover) #> [1] 0.92"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html","id":"motivation","dir":"Articles","previous_headings":"","what":"Motivation","title":"Kernel Methods from Tree Ensembles in StochTree","text":"trained tree ensemble strong --sample performance admits natural motivation “distance” two samples: shared leaf membership. number leaves ensemble 1 \\(s\\) (, tree 1 3 leaves, reserves numbers 1 - 3, turn tree 2 5 leaves, reserves numbers 4 - 8 label leaves, ). dataset \\(n\\) observations, construct matrix \\(W\\) follows:    Initialize \\(W\\) matrix zeroes \\(n\\) rows many columns leaves ensemble    Let s = 0    \\(j\\) \\(\\left\\{1,\\dots,m\\right\\}\\):       Let num_leaves number leaves tree \\(j\\)       \\(\\) \\(\\left\\{1,\\dots,n\\right\\}\\):          Let k leaf tree \\(j\\) maps observation \\(\\)          Set element \\(W_{,k+s} = 1\\)       Let s = s + num_leaves sparse matrix \\(W\\) matrix representation basis predictions ensemble (.e. integrating leaf parameters just analyzing leaf indices). ensemble \\(m\\) trees, can determine proportion trees map observation leaf computing \\(W W^T / m\\). can form basis kernel function used Gaussian process regression, demonstrate . begin, load stochtree package tgp package serve point reference.","code":"library(stochtree) library(tgp) library(MASS) library(mvtnorm)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html","id":"demo-1-univariate-supervised-learning","dir":"Articles","previous_headings":"","what":"Demo 1: Univariate Supervised Learning","title":"Kernel Methods from Tree Ensembles in StochTree","text":"begin simulated example tgp package (Gramacy Taddy (2010)). data generating process (DGP) non-stationary single numeric covariate. define training set test set evaluate various approaches modeling sample outcome data.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html","id":"traditional-gaussian-process","dir":"Articles","previous_headings":"Demo 1: Univariate Supervised Learning","what":"Traditional Gaussian Process","title":"Kernel Methods from Tree Ensembles in StochTree","text":"can use tgp package model data classical Gaussian Process. Assess RMSE","code":"# Generate the data X_train <- seq(0,20,length=100) X_test <- seq(0,20,length=99) y_train <- (sin(pi*X_train/5) + 0.2*cos(4*pi*X_train/5)) * (X_train <= 9.6) lin_train <- X_train>9.6; y_train[lin_train] <- -1 + X_train[lin_train]/10 y_train <- y_train + rnorm(length(y_train), sd=0.1) y_test <- (sin(pi*X_test/5) + 0.2*cos(4*pi*X_test/5)) * (X_test <= 9.6) lin_test <- X_test>9.6; y_test[lin_test] <- -1 + X_test[lin_test]/10 # Fit the GP model_gp <- bgp(X=X_train, Z=y_train, XX=X_test) plot(model_gp$ZZ.mean, y_test, xlab = \"predicted\", ylab = \"actual\", main = \"Gaussian process\") abline(0,1,lwd=2.5,lty=3,col=\"red\") sqrt(mean((model_gp$ZZ.mean - y_test)^2)) sqrt(mean((model_gp$ZZ.mean - y_test)^2)) #> [1] 0.0466081"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html","id":"bart-based-gaussian-process","dir":"Articles","previous_headings":"Demo 1: Univariate Supervised Learning","what":"BART-based Gaussian process","title":"Kernel Methods from Tree Ensembles in StochTree","text":"Assess RMSE","code":"# Run BART on the data num_trees <- 200 sigma_leaf <- 1/num_trees bart_model <- bart(X_train=X_train, y_train=y_train, X_test=X_test, num_trees=num_trees) # Extract kernels needed for kriging result_kernels <- computeForestKernels(bart_model=bart_model, X_train=X_train, X_test=X_test) Sigma_11 <- result_kernels$kernel_test Sigma_12 <- result_kernels$kernel_test_train Sigma_22 <- result_kernels$kernel_train Sigma_22_inv <- ginv(Sigma_22) Sigma_21 <- t(Sigma_12) # Compute mean and covariance for the test set posterior mu_tilde <- Sigma_12 %*% Sigma_22_inv %*% y_train Sigma_tilde <- (sigma_leaf)*(Sigma_11 - Sigma_12 %*% Sigma_22_inv %*% Sigma_21) # Sample from f(X_test) | X_test, X_train, f(X_train) gp_samples <- mvtnorm::rmvnorm(1000, mean = mu_tilde, sigma = Sigma_tilde) # Compute posterior mean predictions for f(X_test) yhat_mean_test <- colMeans(gp_samples) plot(yhat_mean_test, y_test, xlab = \"predicted\", ylab = \"actual\", main = \"BART Gaussian process\") abline(0,1,lwd=2.5,lty=3,col=\"red\") sqrt(mean((yhat_mean_test - y_test)^2)) #> [1] 0.09765312"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html","id":"demo-2-multivariate-supervised-learning","dir":"Articles","previous_headings":"","what":"Demo 2: Multivariate Supervised Learning","title":"Kernel Methods from Tree Ensembles in StochTree","text":"proceed simulated “Friedman” dataset, implemented tgp.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html","id":"traditional-gaussian-process-1","dir":"Articles","previous_headings":"Demo 2: Multivariate Supervised Learning","what":"Traditional Gaussian Process","title":"Kernel Methods from Tree Ensembles in StochTree","text":"can use tgp package model data classical Gaussian Process. Assess RMSE","code":"# Generate the data, add many \"noise variables\" n <- 100 friedman.df <- friedman.1.data(n=n) train_inds <- sort(sample(1:n, floor(0.8*n), replace = F)) test_inds <- (1:n)[!((1:n) %in% train_inds)] X <- as.matrix(friedman.df)[,1:10] X <- cbind(X, matrix(runif(n*10), ncol = 10)) y <- as.matrix(friedman.df)[,12] + rnorm(n,0,1)*(sd(as.matrix(friedman.df)[,11])/2) X_train <- X[train_inds,] X_test <- X[test_inds,] y_train <- y[train_inds] y_test <- y[test_inds] # Fit the GP model_gp <- bgp(X=X_train, Z=y_train, XX=X_test) plot(model_gp$ZZ.mean, y_test, xlab = \"predicted\", ylab = \"actual\", main = \"Gaussian process\") abline(0,1,lwd=2.5,lty=3,col=\"red\") sqrt(mean((model_gp$ZZ.mean - y_test)^2)) #> [1] 5.023593"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html","id":"bart-based-gaussian-process-1","dir":"Articles","previous_headings":"Demo 2: Multivariate Supervised Learning","what":"BART-based Gaussian process","title":"Kernel Methods from Tree Ensembles in StochTree","text":"Assess RMSE use case BART kernel classical kriging perhaps unclear without empirical investigation, see later vignette kernel approach can beneficial causal inference applications.","code":"# Run BART on the data num_trees <- 200 sigma_leaf <- 1/num_trees bart_model <- bart(X_train=X_train, y_train=y_train, X_test=X_test, num_trees=num_trees) # Extract kernels needed for kriging result_kernels <- computeForestKernels(bart_model=bart_model, X_train=X_train, X_test=X_test) Sigma_11 <- result_kernels$kernel_test Sigma_12 <- result_kernels$kernel_test_train Sigma_22 <- result_kernels$kernel_train Sigma_22_inv <- ginv(Sigma_22) Sigma_21 <- t(Sigma_12) # Compute mean and covariance for the test set posterior mu_tilde <- Sigma_12 %*% Sigma_22_inv %*% y_train Sigma_tilde <- (sigma_leaf)*(Sigma_11 - Sigma_12 %*% Sigma_22_inv %*% Sigma_21) # Sample from f(X_test) | X_test, X_train, f(X_train) gp_samples <- mvtnorm::rmvnorm(1000, mean = mu_tilde, sigma = Sigma_tilde) # Compute posterior mean predictions for f(X_test) yhat_mean_test <- colMeans(gp_samples) plot(yhat_mean_test, y_test, xlab = \"predicted\", ylab = \"actual\", main = \"BART Gaussian process\") abline(0,1,lwd=2.5,lty=3,col=\"red\") sqrt(mean((yhat_mean_test - y_test)^2)) #> [1] 5.198576"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"motivation","dir":"Articles","previous_headings":"","what":"Motivation","title":"Prototype Interface in StochTree","text":"functions bart() bcf() provide simple performant interfaces supervised learning / causal inference, stochtree also offers access many “low-level” data structures typically implemented C++. low-level interface designed performance even simplicity — rather intent provide “prototype” interface C++ code doesn’t require modifying C++. illustrate prototype interface might useful, consider classic BART algorithm:    INPUT: \\(y\\), \\(X\\), \\(\\tau\\), \\(\\nu\\), \\(\\lambda\\), \\(\\alpha\\), \\(\\beta\\)    OUTPUT: \\(m\\) samples decision forest \\(k\\) trees global variance parameter \\(\\sigma^2\\)    Initialize \\(\\sigma^2\\) via default data-dependent calibration exercise    Initialize “forest 0” \\(k\\) trees single root node, referring tree \\(j\\)’s prediction vector \\(f_{0,j}\\)    Compute residual \\(r = y - \\sum_{j=1}^k f_{0,j}\\)    \\(\\) \\(\\left\\{1,\\dots,m\\right\\}\\):       Initialize forest \\(\\) forest \\(-1\\)       \\(j\\) \\(\\left\\{1,\\dots,k\\right\\}\\):          Add predictions tree \\(j\\) residual: \\(r = r + f_{,j}\\)          Update tree \\(j\\) via Metropolis-Hastings \\(r\\) \\(X\\) data tree priors depending (\\(\\tau\\), \\(\\sigma^2\\), \\(\\alpha\\), \\(\\beta\\))          Sample leaf node parameters tree \\(j\\) via Gibbs (leaf node prior \\(N\\left(0,\\tau\\right)\\))          Subtract (updated) predictions tree \\(j\\) residual: \\(r = r - f_{,j}\\)       Sample \\(\\sigma^2\\) via Gibbs (prior \\(IG(\\nu/2,\\nu\\lambda/2)\\)) algorithm conceptually simple, much core computation carried low-level languages C C++ tree data structure. result, changes algorithm, supporting heteroskedasticity (Pratola et al. (2020)), categorical outcomes (Murray (2021)) causal effect estimation (Hahn, Murray, Carvalho (2020)) require modifying low-level code. prototype interface exposes core components loop R level, thus making possible interchange C++ computation steps like “update tree \\(j\\) via Metropolis-Hastings” R computation custom variance model, user-specified additive mean model components, . begin, load stochtree package","code":"library(stochtree)"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation","dir":"Articles","previous_headings":"Demo 1: Supervised Learning","what":"Simulation","title":"Prototype Interface in StochTree","text":"Simulate simple partitioned linear model","code":"# Generate the data n <- 500 p_X <- 10 p_W <- 1 X <- matrix(runif(n*p_X), ncol = p_X) W <- matrix(runif(n*p_W), ncol = p_W) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-3*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-1*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (1*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (3*W[,1]) ) y <- f_XW + rnorm(n, 0, 1) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling","dir":"Articles","previous_headings":"Demo 1: Supervised Learning","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Initialize R-level access C++ classes needed sample model Prepare run sampler Run grow--root sampler “warm-start” BART Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"alpha <- 0.9 beta <- 1.25 min_samples_leaf <- 1 num_trees <- 100 cutpoint_grid_size = 100 global_variance_init = 1. tau_init = 0.5 leaf_prior_scale = matrix(c(tau_init), ncol = 1) nu <- 4 lambda <- 0.5 a_leaf <- 2. b_leaf <- 0.5 leaf_regression <- T feature_types <- as.integer(rep(0, p_X)) # 0 = numeric var_weights <- rep(1/p_X, p_X) # Data if (leaf_regression) { forest_dataset <- createForestDataset(X, W) outcome_model_type <- 1 } else { forest_dataset <- createForestDataset(X) outcome_model_type <- 0 } outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf) # Container of forest samples if (leaf_regression) { forest_samples <- createForestContainer(num_trees, 1, F) } else { forest_samples <- createForestContainer(num_trees, 1, T) } num_warmstart <- 10 num_mcmc <- 100 num_samples <- num_warmstart + num_mcmc global_var_samples <- c(global_variance_init, rep(0, num_samples)) leaf_scale_samples <- c(tau_init, rep(0, num_samples)) for (i in 1:num_warmstart) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = T ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] } for (i in (num_warmstart+1):num_samples) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = F ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] } # Forest predictions preds <- forest_samples$predict(forest_dataset)*y_std + y_bar # Global error variance sigma_samples <- sqrt(global_var_samples)*y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results","dir":"Articles","previous_headings":"Demo 1: Supervised Learning","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect initial samples obtained via “grow--root” (Hahn (2023)) Inspect BART samples obtained “warm-starting”","code":"plot(sigma_samples[1:num_warmstart], ylab=\"sigma\") plot(rowMeans(preds[,1:num_warmstart]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(sigma_samples[(num_warmstart+1):num_samples], ylab=\"sigma\") plot(rowMeans(preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"demo-2-supervised-learning-with-additive-random-effects","dir":"Articles","previous_headings":"","what":"Demo 2: Supervised Learning with Additive Random Effects","title":"Prototype Interface in StochTree","text":"build example add simple “random effects” structure: every observation either group 1 group 2 random group intercept (simulated quite strong, underscoring need random effects modeling).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation-1","dir":"Articles","previous_headings":"Demo 2: Supervised Learning with Additive Random Effects","what":"Simulation","title":"Prototype Interface in StochTree","text":"Simulate partitioned linear model simple additive group random effect structure","code":"# Generate the data n <- 500 p_X <- 10 p_W <- 1 X <- matrix(runif(n*p_X), ncol = p_X) W <- matrix(runif(n*p_W), ncol = p_W) group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- c(-5, 5) rfx_basis <- rep(1, n) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-3*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-1*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (1*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (3*W[,1]) ) rfx_term <- rfx_coefs[group_ids] * rfx_basis y <- f_XW + rfx_term + rnorm(n, 0, 1) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling-1","dir":"Articles","previous_headings":"Demo 2: Supervised Learning with Additive Random Effects","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Set parameters inform random effects samplers Initialize R-level access C++ classes needed sample model Prepare run sampler Run grow--root sampler “warm-start” BART Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"alpha <- 0.9 beta <- 1.25 min_samples_leaf <- 1 num_trees <- 100 cutpoint_grid_size = 100 global_variance_init = 1. tau_init = 0.5 leaf_prior_scale = matrix(c(tau_init), ncol = 1) nu <- 4 lambda <- 0.5 a_leaf <- 2. b_leaf <- 0.5 leaf_regression <- T feature_types <- as.integer(rep(0, p_X)) # 0 = numeric var_weights <- rep(1/p_X, p_X) alpha_init <- c(1) xi_init <- matrix(c(1,1),1,2) sigma_alpha_init <- matrix(c(1),1,1) sigma_xi_init <- matrix(c(1),1,1) sigma_xi_shape <- 1 sigma_xi_scale <- 1 # Data if (leaf_regression) { forest_dataset <- createForestDataset(X, W) outcome_model_type <- 1 } else { forest_dataset <- createForestDataset(X) outcome_model_type <- 0 } outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf) # Container of forest samples if (leaf_regression) { forest_samples <- createForestContainer(num_trees, 1, F) } else { forest_samples <- createForestContainer(num_trees, 1, T) } # Random effects dataset rfx_basis <- as.matrix(rfx_basis) group_ids <- as.integer(group_ids) rfx_dataset <- createRandomEffectsDataset(group_ids, rfx_basis) # Random effects details num_groups <- length(unique(group_ids)) num_components <- ncol(rfx_basis) # Random effects tracker rfx_tracker <- createRandomEffectsTracker(group_ids) # Random effects model rfx_model <- createRandomEffectsModel(num_components, num_groups) rfx_model$set_working_parameter(alpha_init) rfx_model$set_group_parameters(xi_init) rfx_model$set_working_parameter_cov(sigma_alpha_init) rfx_model$set_group_parameter_cov(sigma_xi_init) rfx_model$set_variance_prior_shape(sigma_xi_shape) rfx_model$set_variance_prior_scale(sigma_xi_scale) # Random effect samples rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) num_warmstart <- 10 num_mcmc <- 100 num_samples <- num_warmstart + num_mcmc global_var_samples <- c(global_variance_init, rep(0, num_samples)) leaf_scale_samples <- c(tau_init, rep(0, num_samples)) for (i in 1:num_warmstart) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = T ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } for (i in (num_warmstart+1):num_samples) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = F ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } # Forest predictions forest_preds <- forest_samples$predict(forest_dataset)*y_std + y_bar # Random effects predictions rfx_preds <- rfx_samples$predict(group_ids, rfx_basis)*y_std # Overall predictions preds <- forest_preds + rfx_preds # Global error variance sigma_samples <- sqrt(global_var_samples)*y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results-1","dir":"Articles","previous_headings":"Demo 2: Supervised Learning with Additive Random Effects","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect initial samples obtained via grow--root additive random effects model Inspect BART samples obtained “warm-starting” plus additive random effects model Now inspect samples BART forest alone (without considering random effect predictions)","code":"plot(sigma_samples[1:num_warmstart], ylab=\"sigma\") plot(rowMeans(preds[,1:num_warmstart]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(sigma_samples[(num_warmstart+1):num_samples], ylab=\"sigma\") plot(rowMeans(preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(forest_preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"demo-3-supervised-learning-with-additive-multi-component-random-effects","dir":"Articles","previous_headings":"","what":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","title":"Prototype Interface in StochTree","text":"build example, case allowing random intercept regression coefficient (pre-specified basis) group (1 2).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation-2","dir":"Articles","previous_headings":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","what":"Simulation","title":"Prototype Interface in StochTree","text":"Simulate partitioned linear model simple additive group random effect structure","code":"# Generate the data n <- 500 p_X <- 10 p_W <- 1 X <- matrix(runif(n*p_X), ncol = p_X) W <- matrix(runif(n*p_W), ncol = p_W) group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-5, -3, 5, 3),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-3*W[,1]) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-1*W[,1]) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (1*W[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (3*W[,1]) ) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) y <- f_XW + rfx_term + rnorm(n, 0, 1) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling-2","dir":"Articles","previous_headings":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Set parameters inform random effects samplers Initialize R-level access C++ classes needed sample model Prepare run sampler Run grow--root sampler “warm-start” BART Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"alpha <- 0.9 beta <- 1.25 min_samples_leaf <- 1 num_trees <- 100 cutpoint_grid_size = 100 global_variance_init = 1. tau_init = 0.5 leaf_prior_scale = matrix(c(tau_init), ncol = 1) nu <- 4 lambda <- 0.5 a_leaf <- 2. b_leaf <- 0.5 leaf_regression <- T feature_types <- as.integer(rep(0, p_X)) # 0 = numeric var_weights <- rep(1/p_X, p_X) alpha_init <- c(1,0) xi_init <- matrix(c(1,0,1,0),2,2) sigma_alpha_init <- diag(1,2,2) sigma_xi_init <- diag(1,2,2) sigma_xi_shape <- 1 sigma_xi_scale <- 1 # Data if (leaf_regression) { forest_dataset <- createForestDataset(X, W) outcome_model_type <- 1 } else { forest_dataset <- createForestDataset(X) outcome_model_type <- 0 } outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf) # Container of forest samples if (leaf_regression) { forest_samples <- createForestContainer(num_trees, 1, F) } else { forest_samples <- createForestContainer(num_trees, 1, T) } # Random effects dataset rfx_basis <- as.matrix(rfx_basis) group_ids <- as.integer(group_ids) rfx_dataset <- createRandomEffectsDataset(group_ids, rfx_basis) # Random effects details num_groups <- length(unique(group_ids)) num_components <- ncol(rfx_basis) # Random effects tracker rfx_tracker <- createRandomEffectsTracker(group_ids) # Random effects model rfx_model <- createRandomEffectsModel(num_components, num_groups) rfx_model$set_working_parameter(alpha_init) rfx_model$set_group_parameters(xi_init) rfx_model$set_working_parameter_cov(sigma_alpha_init) rfx_model$set_group_parameter_cov(sigma_xi_init) rfx_model$set_variance_prior_shape(sigma_xi_shape) rfx_model$set_variance_prior_scale(sigma_xi_scale) # Random effect samples rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) num_warmstart <- 10 num_mcmc <- 100 num_samples <- num_warmstart + num_mcmc global_var_samples <- c(global_variance_init, rep(0, num_samples)) leaf_scale_samples <- c(tau_init, rep(0, num_samples)) for (i in 1:num_warmstart) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = T ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } for (i in (num_warmstart+1):num_samples) { # Sample forest forest_model$sample_one_iteration( forest_dataset, outcome, forest_samples, rng, feature_types, outcome_model_type, leaf_prior_scale, var_weights, global_var_samples[i], cutpoint_grid_size, gfr = F ) # Sample global variance parameter global_var_samples[i+1] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` leaf_scale_samples[i+1] <- sample_tau_one_iteration( forest_samples, rng, a_leaf, b_leaf, i-1 ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] # Sample random effects model rfx_model$sample_random_effect(rfx_dataset, outcome, rfx_tracker, rfx_samples, global_var_samples[i+1], rng) } # Forest predictions forest_preds <- forest_samples$predict(forest_dataset)*y_std + y_bar # Random effects predictions rfx_preds <- rfx_samples$predict(group_ids, rfx_basis)*y_std # Overall predictions preds <- forest_preds + rfx_preds # Global error variance sigma_samples <- sqrt(global_var_samples)*y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results-2","dir":"Articles","previous_headings":"Demo 3: Supervised Learning with Additive Multi-Component Random Effects","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect initial samples obtained via grow--root additive random effects model Inspect BART samples obtained “warm-starting” plus additive random effects model Now inspect samples BART forest alone (without considering random effect predictions)","code":"plot(sigma_samples[1:num_warmstart], ylab=\"sigma\") plot(rowMeans(preds[,1:num_warmstart]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(sigma_samples[(num_warmstart+1):num_samples], ylab=\"sigma\") plot(rowMeans(preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(forest_preds[,(num_warmstart+1):num_samples]), y, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\") abline(0,1,col=\"red\",lty=2,lwd=2.5)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"demo-4-causal-inference","dir":"Articles","previous_headings":"","what":"Demo 4: Causal Inference","title":"Prototype Interface in StochTree","text":"show implement Bayesian Causal Forest (BCF) model Hahn, Murray, Carvalho (2020) using stochtree’s prototype API, including demoing non-trivial sampling step done R level.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"background","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Background","title":"Prototype Interface in StochTree","text":"supervised learning case previous demo conceptually simple, motivate causal effect estimation task additional notation. Let \\(y\\) refer continuous outcome interest, \\(Z\\) refer binary treatment, \\(X\\) set covariates may influence \\(Y\\), \\(Z\\), . \\(X\\) exhaustive set covariates influence \\(Z\\) \\(Y\\), can specific \\(Y\\) terms causal model (see example Pearl (2009)) \\[\\begin{equation*} \\begin{aligned} Y &= F(Z, X, \\epsilon_Y) \\end{aligned} \\end{equation*}\\] \\(\\epsilon_Y\\) outcome specific random noise \\(F\\) function generates \\(Y\\) (many cases, \\(F\\) can thought inverse CDF conditional \\(X\\) \\(Z\\)). “potential outcomes” (see Imbens Rubin (2015)) can recovered \\(Y^1 = F(1, X, \\epsilon_Y)\\) \\(Y^0 = F(0, X, \\epsilon_Y)\\). causal outcome model can decomposed “mean” “error” terms \\[\\begin{equation*} \\begin{aligned} Y &= \\mu(X) + Z\\tau(X) + \\left[\\eta(X) + Z\\delta(X)\\right]\\\\ \\mu(X) &= \\mathbb{E}_{\\epsilon_Y}\\left[F(0, X, \\epsilon_Y)\\right]\\\\ \\tau(X) &= \\mathbb{E}_{\\epsilon_Y}\\left[F(1, X, \\epsilon_Y) - F(0, X, \\epsilon_Y)\\right]\\\\ \\eta(X) &= F(0, X, \\epsilon_Y) - \\mathbb{E}_{\\epsilon_Y}\\left[F(0, X, \\epsilon_Y)\\right]\\\\ \\delta(X) &= F(1, X, \\epsilon_Y) - F(0, X, \\epsilon_Y) - \\mathbb{E}_{\\epsilon_Y}\\left[F(1, X, \\epsilon_Y) - F(0, X, \\epsilon_Y)\\right] \\end{aligned} \\end{equation*}\\] \\(\\tau(X)\\) precisely conditional average treatment effect (CATE) estimand. Unfortunately, functional form \\(F\\) unavailable analysis, \\(\\tau(X)\\) derived. flexible, regularized nonparametrics enter picture, aim estimate \\(\\mu(X)\\) \\(\\tau(X)\\) data.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"bayesian-causal-forest-bcf","dir":"Articles","previous_headings":"Demo 4: Causal Inference > Background","what":"Bayesian Causal Forest (BCF)","title":"Prototype Interface in StochTree","text":"BCF estimates \\(\\mu(X)\\) \\(\\tau(X)\\) using separate BART forests term. Furthermore, rather rely common implicit coding \\(Z\\) 0 control observations 1 treated observations, consider coding control observations parameter \\(b_0\\) treated observations parameter \\(b_1\\). Placing \\(N(0,1/2)\\) prior \\(b_z\\), essentially redefines outcome model \\[\\begin{equation*} \\begin{aligned} y &= \\mu(X) + \\tau(X) f(Z) + \\epsilon\\\\ f(Z) &= b_0(1-Z) + b_1 Z\\\\ \\epsilon &\\sim N\\left(0, \\sigma^2\\right)\\\\ b_0, b_1 &\\sim N\\left(0, 1/2\\right) \\end{aligned} \\end{equation*}\\] Updating \\(b_z\\) requires additional Gibbs step, derive . Conditioning sampled forests \\(\\mu\\) \\(\\tau\\), essentially regressing \\(y - \\mu(Z)\\) \\(\\left[(1-Z)\\tau(X), Z\\tau(X)\\right]\\) closed form posterior \\[\\begin{equation*} \\begin{aligned} b_0 \\mid y, X, \\mu,\\tau &\\sim N\\left(\\frac{s_{y\\tau,0}}{s_{\\tau\\tau,0} + 2\\sigma^2}, \\frac{\\sigma^2}{s_{\\tau\\tau,0} + 2\\sigma^2}\\right)\\\\ b_1 \\mid y, X, \\mu,\\tau &\\sim N\\left(\\frac{s_{y\\tau,1}}{s_{\\tau\\tau,1} + 2\\sigma^2}, \\frac{\\sigma^2}{s_{\\tau\\tau,1} + 2\\sigma^2}\\right) \\end{aligned} \\end{equation*}\\] \\(s_{y\\tau,z} = \\sum_{: Z_i = z} (y_i - \\mu(X_i))\\tau(X_i)\\) \\(s_{\\tau\\tau,z} = \\sum_{: Z_i = z} \\tau(X_i)\\tau(X_i)\\).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"simulation-3","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Simulation","title":"Prototype Interface in StochTree","text":"simulated causal DGP mirrors nonlinear, heterogeneous treatment effect DGP presented Hahn, Murray, Carvalho (2020).","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 4 y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) # Standardize outcome y_bar <- mean(y) y_std <- sd(y) resid <- (y-y_bar)/y_std"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"sampling-3","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Sampling","title":"Prototype Interface in StochTree","text":"Set parameters inform forest variance parameter samplers Prepare run sampler (now must specify initial values \\(b_0\\) \\(b_1\\), choose -1/2 1/2 instead 0 1). Initialize R-level access C++ classes needed sample model Run grow--root sampler “warm-start” BART, also updating adaptive coding parameter \\(b_0\\) \\(b_1\\) Pick last GFR forest (associated global variance / leaf scale parameters) MCMC sampler Predict rescale samples","code":"# Mu forest alpha_mu <- 0.95 beta_mu <- 2.0 min_samples_leaf_mu <- 5 num_trees_mu <- 250 cutpoint_grid_size_mu = 100 tau_init_mu = 1/num_trees_mu leaf_prior_scale_mu = matrix(c(tau_init_mu), ncol = 1) a_leaf_mu <- 3. b_leaf_mu <- var(resid)/(num_trees_mu) leaf_regression_mu <- F sigma_leaf_mu <- var(resid)/(num_trees_mu) current_leaf_scale_mu <- as.matrix(sigma_leaf_mu) # Tau forest alpha_tau <- 0.25 beta_tau <- 3.0 min_samples_leaf_tau <- 5 num_trees_tau <- 50 cutpoint_grid_size_tau = 100 a_leaf_tau <- 3. b_leaf_tau <- var(resid)/(2*num_trees_tau) leaf_regression_tau <- T sigma_leaf_tau <- var(resid)/(2*num_trees_tau) current_leaf_scale_tau <- as.matrix(sigma_leaf_tau) # Common parameters nu <- 3 sigma2hat <- (sigma(lm(resid~X)))^2 quantile_cutoff <- 0.9 if (is.null(lambda)) { lambda <- (sigma2hat*qgamma(1-quantile_cutoff,nu))/nu } sigma2 <- sigma2hat current_sigma2 <- sigma2 # Sampling composition num_gfr <- 20 num_burnin <- 0 num_mcmc <- 100 num_samples <- num_gfr + num_burnin + num_mcmc # Sigma^2 samples global_var_samples <- rep(0, num_samples) # Adaptive coding parameter samples b_0_samples <- rep(0, num_samples) b_1_samples <- rep(0, num_samples) b_0 <- -0.5 b_1 <- 0.5 current_b_0 <- b_0 current_b_1 <- b_1 tau_basis <- (1-Z)*current_b_0 + Z*current_b_1 # Data X_mu <- cbind(X, pi_x) X_tau <- X feature_types <- c(0,0,0,1,1) feature_types_mu <- as.integer(c(feature_types,0)) feature_types_tau <- as.integer(feature_types) variable_weights_mu = rep(1/ncol(X_mu), ncol(X_mu)) variable_weights_tau = rep(1/ncol(X_tau), ncol(X_tau)) forest_dataset_mu <- createForestDataset(X_mu) forest_dataset_tau <- createForestDataset(X_tau, tau_basis) outcome <- createOutcome(resid) # Random number generator (std::mt19937) rng <- createRNG() # Sampling data structures forest_model_mu <- createForestModel( forest_dataset_mu, feature_types_mu, num_trees_mu, nrow(X_mu), alpha_mu, beta_mu, min_samples_leaf_mu ) forest_model_tau <- createForestModel( forest_dataset_tau, feature_types_tau, num_trees_tau, nrow(X_tau), alpha_tau, beta_tau, min_samples_leaf_tau ) # Container of forest samples forest_samples_mu <- createForestContainer(num_trees_mu, 1, T) forest_samples_tau <- createForestContainer(num_trees_tau, 1, F) # Initialize the leaves of each tree in the prognostic forest forest_samples_mu$set_root_leaves(0, mean(resid) / num_trees_mu) forest_samples_mu$update_residual( forest_dataset_mu, outcome, forest_model_mu, F, 0, F ) # Initialize the leaves of each tree in the treatment effect forest forest_samples_tau$set_root_leaves(0, 0.) forest_samples_tau$update_residual( forest_dataset_tau, outcome, forest_model_tau, T, 0, F ) if (num_gfr > 0){ for (i in 1:num_gfr) { # Sample the prognostic forest forest_model_mu$sample_one_iteration( forest_dataset_mu, outcome, forest_samples_mu, rng, feature_types_mu, 0, current_leaf_scale_mu, variable_weights_mu, current_sigma2, cutpoint_grid_size, gfr = T, pre_initialized = T ) # Sample variance parameters (if requested) global_var_samples[i] <- sample_sigma2_one_iteration( outcome, rng, nu, lambda ) current_sigma2 <- global_var_samples[i] # Sample the treatment forest forest_model_tau$sample_one_iteration( forest_dataset_tau, outcome, forest_samples_tau, rng, feature_types_tau, 1, current_leaf_scale_tau, variable_weights_tau, current_sigma2, cutpoint_grid_size, gfr = T, pre_initialized = T ) # Sample adaptive coding parameters mu_x_raw <- forest_samples_mu$predict_raw_single_forest(forest_dataset_mu, i-1) tau_x_raw <- forest_samples_tau$predict_raw_single_forest(forest_dataset_tau, i-1) s_tt0 <- sum(tau_x_raw*tau_x_raw*(Z==0)) s_tt1 <- sum(tau_x_raw*tau_x_raw*(Z==1)) partial_resid_mu <- resid - mu_x_raw s_ty0 <- sum(tau_x_raw*partial_resid_mu*(Z==0)) s_ty1 <- sum(tau_x_raw*partial_resid_mu*(Z==1)) current_b_0 <- rnorm(1, (s_ty0/(s_tt0 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt0 + 2*current_sigma2))) current_b_1 <- rnorm(1, (s_ty1/(s_tt1 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt1 + 2*current_sigma2))) tau_basis <- (1-Z)*current_b_0 + Z*current_b_1 forest_dataset_tau$update_basis(tau_basis) b_0_samples[i] <- current_b_0 b_1_samples[i] <- current_b_1 # Sample variance parameters (if requested) global_var_samples[i] <- sample_sigma2_one_iteration(outcome, rng, nu, lambda) current_sigma2 <- global_var_samples[i] } } if (num_burnin + num_mcmc > 0) { for (i in (num_gfr+1):num_samples) { # Sample the prognostic forest forest_model_mu$sample_one_iteration( forest_dataset_mu, outcome, forest_samples_mu, rng, feature_types_mu, 0, current_leaf_scale_mu, variable_weights_mu, current_sigma2, cutpoint_grid_size, gfr = F, pre_initialized = T ) # Sample global variance parameter global_var_samples[i] <- sample_sigma2_one_iteration(outcome, rng, nu, lambda) current_sigma2 <- global_var_samples[i] # Sample the treatment forest forest_model_tau$sample_one_iteration( forest_dataset_tau, outcome, forest_samples_tau, rng, feature_types_tau, 1, current_leaf_scale_tau, variable_weights_tau, current_sigma2, cutpoint_grid_size, gfr = F, pre_initialized = T ) # Sample coding parameters mu_x_raw <- forest_samples_mu$predict_raw_single_forest(forest_dataset_mu, i-1) tau_x_raw <- forest_samples_tau$predict_raw_single_forest(forest_dataset_tau, i-1) s_tt0 <- sum(tau_x_raw*tau_x_raw*(Z==0)) s_tt1 <- sum(tau_x_raw*tau_x_raw*(Z==1)) partial_resid_mu <- resid - mu_x_raw s_ty0 <- sum(tau_x_raw*partial_resid_mu*(Z==0)) s_ty1 <- sum(tau_x_raw*partial_resid_mu*(Z==1)) current_b_0 <- rnorm(1, (s_ty0/(s_tt0 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt0 + 2*current_sigma2))) current_b_1 <- rnorm(1, (s_ty1/(s_tt1 + 2*current_sigma2)), sqrt(current_sigma2/(s_tt1 + 2*current_sigma2))) tau_basis <- (1-Z)*current_b_0 + Z*current_b_1 forest_dataset_tau$update_basis(tau_basis) b_0_samples[i] <- current_b_0 b_1_samples[i] <- current_b_1 # Sample global variance parameter global_var_samples[i] <- sample_sigma2_one_iteration(outcome, rng, nu, lambda) current_sigma2 <- global_var_samples[i] } } # Forest predictions mu_hat <- forest_samples_mu$predict(forest_dataset_mu)*y_std + y_bar tau_hat_raw <- forest_samples_tau$predict_raw(forest_dataset_tau) tau_hat <- t(t(tau_hat_raw) * (b_1_samples - b_0_samples))*y_std y_hat <- mu_hat + tau_hat * as.numeric(Z) # Global error variance sigma2_samples <- global_var_samples*(y_std^2)"},{"path":"https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html","id":"results-3","dir":"Articles","previous_headings":"Demo 4: Causal Inference","what":"Results","title":"Prototype Interface in StochTree","text":"Inspect XBART results Inspect warm start BART results Inspect “adaptive coding” parameters \\(b_0\\) \\(b_1\\).","code":"plot(sigma2_samples[1:num_gfr], ylab=\"sigma^2\") plot(rowMeans(mu_hat[,1:num_gfr]), mu_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"prognostic term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(tau_hat[,1:num_gfr]), tau_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"treatment effect term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) mean((rowMeans(tau_hat[,1:num_gfr]) - tau_x)^2) #> [1] 0.3165563 plot(sigma_samples[(num_gfr+1):num_samples], ylab=\"sigma^2\") plot(rowMeans(mu_hat[,(num_gfr+1):num_samples]), mu_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"prognostic term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) plot(rowMeans(tau_hat[,(num_gfr+1):num_samples]), tau_x, pch=16, cex=0.75, xlab = \"pred\", ylab = \"actual\", main = \"treatment effect term\") abline(0,1,col=\"red\",lty=2,lwd=2.5) mean((rowMeans(tau_hat[,(num_gfr+1):num_samples]) - tau_x)^2) #> [1] 0.5352369 plot(b_0_samples, col = \"blue\", ylab = \"Coding parameter draws\", ylim = c(min(min(b_0_samples), min(b_1_samples)), max(max(b_0_samples), max(b_1_samples)))) points(b_1_samples, col = \"orange\") legend(\"topleft\", legend = c(\"b_0\", \"b_1\"), col = c(\"blue\", \"orange\"), pch = c(1,1))"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Drew Herren. Author, maintainer. Richard Hahn. Author. Jared Murray. Author. Carlos Carvalho. Author. Jingyu . Author.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Herren D, Hahn R, Murray J, Carvalho C, J (2024). stochtree: Stochastic tree ensembles (XBART BART) supervised learning causal inference. R package version 0.0.0.9000, https://stochastictree.github.io/stochtree-r/.","code":"@Manual{, title = {stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference}, author = {Drew Herren and Richard Hahn and Jared Murray and Carlos Carvalho and Jingyu He}, year = {2024}, note = {R package version 0.0.0.9000}, url = {https://stochastictree.github.io/stochtree-r/}, }"},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/index.html","id":"getting-started","dir":"","previous_headings":"","what":"Getting started","title":"Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference","text":"package can installed R via","code":"remotes::install_github(\"StochasticTree/stochtree-r\")"},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":null,"dir":"Reference","previous_headings":"","what":"Run the BART algorithm for supervised learning. — bart","title":"Run the BART algorithm for supervised learning. — bart","text":"Run BART algorithm supervised learning.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run the BART algorithm for supervised learning. — bart","text":"","code":"bart( X_train, y_train, W_train = NULL, group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, feature_types = rep(0, ncol(X_train)), variable_weights = rep(1/ncol(X_train), ncol(X_train)), cutpoint_grid_size = 100, tau_init = NULL, alpha = 0.95, beta = 2, min_samples_leaf = 5, leaf_model = 0, nu = 3, lambda = NULL, a_leaf = 3, b_leaf = NULL, q = 0.9, sigma2_init = NULL, num_trees = 200, num_gfr = 5, num_burnin = 0, num_mcmc = 100, sample_sigma = T, sample_tau = T, random_seed = -1 )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run the BART algorithm for supervised learning. — bart","text":"X_train Covariates used split trees ensemble. y_train Outcome modeled ensemble. W_train (Optional) Bases used define regression model y ~ W leaf regression tree. default, BART assumes constant leaf node parameters, implicitly regressing constant basis ones (.e. y ~ 1). group_ids_train (Optional) Group labels used additive random effects model. rfx_basis_train (Optional) Basis \"random-slope\" regression additive random effects model. group_ids_train provided regression basis, intercept-random effects model estimated. X_test (Optional) Test set covariates used define \"sample\" evaluation data. W_test (Optional) Test set bases used define \"sample\" evaluation data. test set optional, structure provided test set must match training set (.e. X_train W_train provided, test set must consist X_test W_test number columns). group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model. feature_types Vector length ncol(X_train) indicating \"type\" covariates (0 = numeric, 1 = ordered categorical, 2 = unordered categorical). Default: rep(0,ncol(X_train)). variable_weights Vector length ncol(X_train) indicating \"weight\" placed variable sampling purposes. Default: rep(1/ncol(X_train),ncol(X_train)). cutpoint_grid_size Maximum size \"grid\" potential cutpoints consider. Default: 100. tau_init Starting value leaf node scale parameter. Calibrated internally 1/num_trees set . alpha Prior probability splitting tree depth 0. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. beta Exponent decreases split probabilities nodes depth > 0. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. min_samples_leaf Minimum allowable size leaf, terms training samples. Default: 5. leaf_model Integer indicating leaf model, 0 = constant Gaussian prior, 1 = univariate regression Gaussian prior, 2 = multivariate regression Gaussian prior. W_train ignored set 0. Default: 0. nu Shape parameter IG(nu, nu*lambda) global error variance model. Default: 3. lambda Component scale parameter IG(nu, nu*lambda) global error variance prior. specified, calibrated Sparapani et al (2021). a_leaf Shape parameter IG(a_leaf, b_leaf) leaf node parameter variance model. Default: 3. b_leaf Scale parameter IG(a_leaf, b_leaf) leaf node parameter variance model. Calibrated internally 0.5/num_trees set . q Quantile used calibrated lambda Sparapani et al (2021). Default: 0.9. sigma2_init Starting value global variance parameter. Calibrated internally Sparapani et al (2021) set . num_trees Number trees ensemble. Default: 200. num_gfr Number \"warm-start\" iterations run using grow--root algorithm (Hahn, 2021). Default: 5. num_burnin Number \"burn-\" iterations MCMC sampler. Default: 0. num_mcmc Number \"retained\" iterations MCMC sampler. Default: 100. sample_sigma Whether update sigma^2 global error variance parameter based IG(nu, nu*lambda). Default: T. sample_tau Whether update tau leaf scale variance parameter based IG(a_leaf, b_leaf). set true leaf_model=2. Default: T. random_seed Integer parameterizing C++ random number generator. specified, C++ random number generator seeded according std::random_device.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run the BART algorithm for supervised learning. — bart","text":"List sampling outputs wrapper around sampled forests (can used -memory prediction new data, serialized JSON disk).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/BART.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run the BART algorithm for supervised learning. — bart","text":"","code":"n <- 100 p <- 5 X <- matrix(runif(n*p), ncol = p) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) noise_sd <- 1 y <- f_XW + rnorm(n, 0, noise_sd) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, leaf_model = 0) # plot(rowMeans(bart_model$yhat_test), y_test, xlab = \"predicted\", ylab = \"actual\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"Persists C++ random number generator throughout R session ensure reproducibility given random seed. seed provided, C++ random number generator initialized using std::random_device.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"rng_ptr External pointer C++ std::mt19937 class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"CppRNG$new()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"Create new CppRNG object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"","code":"CppRNG$new(random_seed = -1)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"random_seed (Optional) random seed sampling","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/CppRNG.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps a C++ random number generator (for reproducibility) — CppRNG","text":"new CppRNG object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Dataset used to sample a forest — ForestDataset","title":"Dataset used to sample a forest — ForestDataset","text":"dataset consists three matrices / vectors: covariates, bases, variance weights. basis vector variance weights optional.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Dataset used to sample a forest — ForestDataset","text":"data_ptr External pointer C++ ForestDataset class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Dataset used to sample a forest — ForestDataset","text":"ForestDataset$new() ForestDataset$update_basis() ForestDataset$num_observations() ForestDataset$num_covariates() ForestDataset$num_basis() ForestDataset$has_basis() ForestDataset$has_variance_weights()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Dataset used to sample a forest — ForestDataset","text":"Create new ForestDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$new(covariates, basis = NULL, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dataset used to sample a forest — ForestDataset","text":"covariates Matrix covariates basis (Optional) Matrix bases used define leaf regression variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"new ForestDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-update-basis-","dir":"Reference","previous_headings":"","what":"Method update_basis()","title":"Dataset used to sample a forest — ForestDataset","text":"Update basis matrix dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$update_basis(basis)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dataset used to sample a forest — ForestDataset","text":"basis Updated matrix bases used define leaf regression","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-num-observations-","dir":"Reference","previous_headings":"","what":"Method num_observations()","title":"Dataset used to sample a forest — ForestDataset","text":"Return number observations ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$num_observations()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"Observation count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-num-covariates-","dir":"Reference","previous_headings":"","what":"Method num_covariates()","title":"Dataset used to sample a forest — ForestDataset","text":"Return number covariates ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$num_covariates()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"Covariate count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-num-basis-","dir":"Reference","previous_headings":"","what":"Method num_basis()","title":"Dataset used to sample a forest — ForestDataset","text":"Return number bases ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$num_basis()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"Basis count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-has-basis-","dir":"Reference","previous_headings":"","what":"Method has_basis()","title":"Dataset used to sample a forest — ForestDataset","text":"Whether dataset basis matrix","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$has_basis()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"True basis matrix loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"method-has-variance-weights-","dir":"Reference","previous_headings":"","what":"Method has_variance_weights()","title":"Dataset used to sample a forest — ForestDataset","text":"Whether dataset variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a forest — ForestDataset","text":"","code":"ForestDataset$has_variance_weights()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html","id":"returns-5","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a forest — ForestDataset","text":"True variance weights loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"Computes leaf membership internally sparse matrix also calculates (dense) kernel based sparse matrix C++.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"forest_kernel_ptr External pointer C++ StochTree::ForestKernel class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"ForestKernel$new() ForestKernel$compute_leaf_indices() ForestKernel$compute_kernel()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"Create new ForestKernel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"","code":"ForestKernel$new()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"new ForestKernel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"method-compute-leaf-indices-","dir":"Reference","previous_headings":"","what":"Method compute_leaf_indices()","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"Compute leaf indices tree ensemble every observation dataset. Stores result internally, can extracted class via call get_leaf_indices.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"","code":"ForestKernel$compute_leaf_indices( covariates_train, covariates_test = NULL, forest_container, forest_num )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"covariates_train Matrix training set covariates compute leaf indices covariates_test (Optional) Matrix test set covariates compute leaf indices forest_container Object type ForestSamples forest_num Index forest forest_container assessed","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"List vectors. covariates_test = NULL list one element (train set leaf indices), otherwise list two elements (train test set leaf indices).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"method-compute-kernel-","dir":"Reference","previous_headings":"","what":"Method compute_kernel()","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"Compute kernel implied tree ensemble. function calls compute_leaf_indices, necessary call . compute_leaf_indices exposed class level allow extracting vector leaf indices ensemble directly R.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"","code":"ForestKernel$compute_kernel( covariates_train, covariates_test = NULL, forest_container, forest_num )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"covariates_train Matrix training set covariates assess ensemble kernel covariates_test (Optional) Matrix test set covariates assess ensemble kernel forest_container Object type ForestSamples forest_num Index forest forest_container assessed","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that provides functionality for statistical kernel definition and computation based on shared leaf membership of observations in a tree ensemble. — ForestKernel","text":"List matrices. covariates_test = NULL, list contains one n_train x n_train matrix, n_train = nrow(covariates_train). matrix kernel defined W_train %*% t(W_train) W_train matrix n_train rows many columns total leaves ensemble. covariates_test NULL, list contains two matrices defined W_test %*% t(W_train) W_test %*% t(W_test).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that defines and samples a forest model — ForestModel","title":"Class that defines and samples a forest model — ForestModel","text":"Hosts C++ data structures needed sample ensemble decision trees, exposes functionality run forest sampler (using either MCMC grow--root algorithm).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that defines and samples a forest model — ForestModel","text":"tracker_ptr External pointer C++ ForestTracker class tree_prior_ptr External pointer C++ TreePrior class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that defines and samples a forest model — ForestModel","text":"ForestModel$new() ForestModel$sample_one_iteration()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that defines and samples a forest model — ForestModel","text":"Create new ForestModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that defines and samples a forest model — ForestModel","text":"","code":"ForestModel$new( forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that defines and samples a forest model — ForestModel","text":"forest_dataset ForestDataset object, used initialize forest sampling data structures feature_types Feature types (integers 0 = numeric, 1 = ordered categorical, 2 = unordered categorical) num_trees Number trees forest sampled n Number observations forest_dataset alpha Root node split probability tree prior beta Depth prior penalty tree prior min_samples_leaf Minimum number samples tree leaf","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that defines and samples a forest model — ForestModel","text":"new ForestModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"method-sample-one-iteration-","dir":"Reference","previous_headings":"","what":"Method sample_one_iteration()","title":"Class that defines and samples a forest model — ForestModel","text":"Run single iteration forest sampling algorithm (MCMC GFR)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that defines and samples a forest model — ForestModel","text":"","code":"ForestModel$sample_one_iteration( forest_dataset, residual, forest_samples, rng, feature_types, leaf_model_int, leaf_model_scale, variable_weights, global_scale, cutpoint_grid_size = 500, gfr = T, pre_initialized = F )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestModel.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that defines and samples a forest model — ForestModel","text":"forest_dataset Dataset used sample forest residual Outcome used sample forest forest_samples Container forest samples rng Wrapper around C++ random number generator feature_types Vector specifying type p covariates forest_dataset (0 = numeric, 1 = ordered categorical, 2 = unordered categorical) leaf_model_int Integer specifying leaf model type (0 = constant leaf, 1 = univariate leaf regression, 2 = multivariate leaf regression) leaf_model_scale Scale parameter used leaf node model (q x q matrix q dimensionality basis >1 leaf_model_int = 2) variable_weights Vector specifying sampling probability p covariates forest_dataset global_scale Global variance parameter cutpoint_grid_size (Optional) Number unique cutpoints consider (default: 500, currently used GFR = TRUE) gfr (Optional) Whether forest sampled using \"grow--root\" (GFR) algorithm pre_initialized (Optional) Whether leaves pre-initialized outside sampling loop (samples drawn). multi-forest implementations like BCF, true, though single-forest supervised learning implementation, can let C++ initialization. Default: F.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that stores draws from an random ensemble of decision trees — ForestSamples","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Wrapper around C++ container tree ensembles","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_container_ptr External pointer C++ ForestContainer class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"ForestSamples$new() ForestSamples$predict() ForestSamples$predict_raw() ForestSamples$predict_raw_single_forest() ForestSamples$set_root_leaves() ForestSamples$update_residual() ForestSamples$save_json() ForestSamples$load_json() ForestSamples$num_samples() ForestSamples$num_trees() ForestSamples$output_dimension()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Create new ForestContainer object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$new(num_trees, output_dimension = 1, is_leaf_constant = F)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"num_trees Number trees output_dimension Dimensionality outcome model is_leaf_constant Whether leaf constant","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"new ForestContainer object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-predict-","dir":"Reference","previous_headings":"","what":"Method predict()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Predict every tree ensemble every sample forest_dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$predict(forest_dataset)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_dataset ForestDataset R class","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"matrix predictions many rows forest_dataset many columns samples ForestContainer","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-predict-raw-","dir":"Reference","previous_headings":"","what":"Method predict_raw()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Predict \"raw\" leaf values (without multiplied basis) every tree ensemble every sample forest_dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$predict_raw(forest_dataset)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-2","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_dataset ForestDataset R class","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Array predictions observation forest_dataset sample ForestSamples class prediction dimensionality forests' leaf model. case constant leaf model univariate leaf regression, array two-dimensional (number observations, number forest samples). case multivariate leaf regression, array three-dimension (number observations, leaf model dimension, number samples).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-predict-raw-single-forest-","dir":"Reference","previous_headings":"","what":"Method predict_raw_single_forest()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Predict \"raw\" leaf values (without multiplied basis) specific forest every sample forest_dataset","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$predict_raw_single_forest(forest_dataset, forest_num)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-3","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_dataset ForestDataset R class forest_num Index forest sample within container","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"matrix predictions many rows forest_dataset many columns samples ForestContainer","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-set-root-leaves-","dir":"Reference","previous_headings":"","what":"Method set_root_leaves()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Set constant predicted value every tree ensemble. Stops program tree root node.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$set_root_leaves(forest_num, leaf_value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-4","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"forest_num Index forest sample within container. leaf_value Constant leaf value(s) fixed tree ensemble indexed forest_num. Can either single number vector, depending forest's leaf dimension.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-update-residual-","dir":"Reference","previous_headings":"","what":"Method update_residual()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Updates residual based predictions forest","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$update_residual( dataset, outcome, forest_model, requires_basis, forest_num, add )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-5","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"dataset ForestDataset object storing covariates bases given forest outcome Outcome object storing residuals updated based forest predictions forest_model ForestModel object storing tracking structures used training / sampling requires_basis Whether forest requires basis prediction forest_num Index forest used update residuals add Whether forest predictions added subtracted residuals","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-save-json-","dir":"Reference","previous_headings":"","what":"Method save_json()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Store trees metadata ForestDataset class json file","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$save_json(json_filename)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-6","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"json_filename Name output json file (must end \".json\")","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-load-json-","dir":"Reference","previous_headings":"","what":"Method load_json()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Load trees metadata ensemble json file. Note trees metadata already present ForestDataset class overwritten.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-7","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$load_json(json_filename)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"arguments-7","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"json_filename Name model input json file (must end \".json\")","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-num-samples-","dir":"Reference","previous_headings":"","what":"Method num_samples()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Return number samples ForestContainer object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-8","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$num_samples()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Sample count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-num-trees-","dir":"Reference","previous_headings":"","what":"Method num_trees()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Return number trees ensemble ForestContainer object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-9","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$num_trees()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-5","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Tree count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"method-output-dimension-","dir":"Reference","previous_headings":"","what":"Method output_dimension()","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Return output dimension trees ForestContainer object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"usage-10","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"","code":"ForestSamples$output_dimension()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/ForestSamples.html","id":"returns-6","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that stores draws from an random ensemble of decision trees — ForestSamples","text":"Leaf node parameter size","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":null,"dir":"Reference","previous_headings":"","what":"Outcome / partial residual used to sample an additive model. — Outcome","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"outcome class wrapper around vector (mutable) outcomes ML tasks (supervised learning, causal inference). additive tree ensemble sampled, outcome used sample specific model term \"partial residual\" consisting outcome minus predictions every model term (trees, group random effects, etc...).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"data_ptr External pointer C++ Outcome class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"Outcome$new()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"Create new Outcome object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"","code":"Outcome$new(outcome)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"outcome Vector outcome values","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/Outcome.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Outcome / partial residual used to sample an additive model. — Outcome","text":"new Outcome object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that wraps the ","title":"Class that wraps the ","text":"Coordinates various C++ random effects classes persists needed prediction / serialization","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that wraps the ","text":"rfx_container_ptr External pointer C++ StochTree::RandomEffectsContainer class label_mapper_ptr External pointer C++ StochTree::LabelMapper class training_group_ids Unique vector group IDs training dataset","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that wraps the ","text":"RandomEffectSamples$new() RandomEffectSamples$predict() RandomEffectSamples$extract_parameter_samples() RandomEffectSamples$extract_label_mapping()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that wraps the ","text":"Create new RandomEffectSamples object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$new(num_components, num_groups, random_effects_tracker)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that wraps the ","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups random_effects_tracker Object type RandomEffectsTracker","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"new RandomEffectSamples object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-predict-","dir":"Reference","previous_headings":"","what":"Method predict()","title":"Class that wraps the ","text":"Predict random effects observation implied rfx_group_ids rfx_basis. random effects model \"intercept-\" rfx_basis vector ones size length(rfx_group_ids).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$predict(rfx_group_ids, rfx_basis)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that wraps the ","text":"rfx_group_ids Indices random effects groups prediction set rfx_basis Basis used random effects prediction","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"Matrix many rows observations provided many columns samples drawn model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-extract-parameter-samples-","dir":"Reference","previous_headings":"","what":"Method extract_parameter_samples()","title":"Class that wraps the ","text":"Extract random effects parameters sampled. \"redundant parameterization\" Gelman et al (2008), includes four parameters: alpha (\"working parameter\" shared across every group), xi (\"group parameter\" sampled separately group), beta (product alpha xi, corresponds overall group-level random effects), sigma (group-independent prior variance component xi).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$extract_parameter_samples()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"List arrays. alpha array dimension (num_components, num_samples) simply vector num_components = 1. xi beta arrays dimension (num_components, num_groups, num_samples) simply matrix num_components = 1. sigma array dimension (num_components, num_samples) simply vector num_components = 1.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"method-extract-label-mapping-","dir":"Reference","previous_headings":"","what":"Method extract_label_mapping()","title":"Class that wraps the ","text":"Convert mapping group IDs random effect components indices C++ R native format","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that wraps the ","text":"","code":"RandomEffectSamples$extract_label_mapping()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectSamples.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that wraps the ","text":"List mapping group ID random effect components.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Dataset used to sample a random effects model — RandomEffectsDataset","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"dataset consists three matrices / vectors: group labels, bases, variance weights. Variance weights optional.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"data_ptr External pointer C++ RandomEffectsDataset class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"RandomEffectsDataset$new() RandomEffectsDataset$num_observations() RandomEffectsDataset$has_group_labels() RandomEffectsDataset$has_basis() RandomEffectsDataset$has_variance_weights()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Create new RandomEffectsDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$new(group_labels, basis, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"group_labels Vector group labels basis Matrix bases used define random effects regression (intercept-model, pass array ones) variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"new RandomEffectsDataset object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-num-observations-","dir":"Reference","previous_headings":"","what":"Method num_observations()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Return number observations RandomEffectsDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$num_observations()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Observation count","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-has-group-labels-","dir":"Reference","previous_headings":"","what":"Method has_group_labels()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Whether dataset group label indices","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$has_group_labels()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"True group label vector loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-has-basis-","dir":"Reference","previous_headings":"","what":"Method has_basis()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Whether dataset basis matrix","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$has_basis()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"True basis matrix loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"method-has-variance-weights-","dir":"Reference","previous_headings":"","what":"Method has_variance_weights()","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"Whether dataset variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"","code":"RandomEffectsDataset$has_variance_weights()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsDataset.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"Dataset used to sample a random effects model — RandomEffectsDataset","text":"True variance weights loaded, false otherwise","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":null,"dir":"Reference","previous_headings":"","what":"The core ","title":"The core ","text":"Stores current model state, prior parameters, procedures sampling conditional posterior parameter.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"The core ","text":"rfx_model_ptr External pointer C++ StochTree::RandomEffectsModel class num_groups Number groups random effects model num_components Number components (.e. dimension basis) random effects model","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"The core ","text":"RandomEffectsModel$new() RandomEffectsModel$sample_random_effect() RandomEffectsModel$set_working_parameter() RandomEffectsModel$set_group_parameters() RandomEffectsModel$set_working_parameter_cov() RandomEffectsModel$set_group_parameter_cov() RandomEffectsModel$set_variance_prior_shape() RandomEffectsModel$set_variance_prior_scale()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"The core ","text":"Create new RandomEffectsModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$new(num_components, num_groups)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"new RandomEffectsModel object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-sample-random-effect-","dir":"Reference","previous_headings":"","what":"Method sample_random_effect()","title":"The core ","text":"Sample random effects model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$sample_random_effect( rfx_dataset, residual, rfx_tracker, rfx_samples, global_variance, rng )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"rfx_dataset Object type RandomEffectsDataset residual Object type Outcome rfx_tracker Object type RandomEffectsTracker rfx_samples Object type RandomEffectSamples global_variance Scalar global variance parameter rng Object type CppRNG","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-1","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-working-parameter-","dir":"Reference","previous_headings":"","what":"Method set_working_parameter()","title":"The core ","text":"Set value \"working parameter.\" typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_working_parameter(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-2","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-2","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-group-parameters-","dir":"Reference","previous_headings":"","what":"Method set_group_parameters()","title":"The core ","text":"Set value \"group parameters.\" typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_group_parameters(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-3","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-3","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-working-parameter-cov-","dir":"Reference","previous_headings":"","what":"Method set_working_parameter_cov()","title":"The core ","text":"Set value working parameter covariance. typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_working_parameter_cov(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-4","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-4","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-group-parameter-cov-","dir":"Reference","previous_headings":"","what":"Method set_group_parameter_cov()","title":"The core ","text":"Set value group parameter covariance. typically used initialization, also used interrupt override sampler.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_group_parameter_cov(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-5","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-5","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-variance-prior-shape-","dir":"Reference","previous_headings":"","what":"Method set_variance_prior_shape()","title":"The core ","text":"Set shape parameter group parameter variance prior.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_variance_prior_shape(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-6","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-6","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"method-set-variance-prior-scale-","dir":"Reference","previous_headings":"","what":"Method set_variance_prior_scale()","title":"The core ","text":"Set shape parameter group parameter variance prior.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"usage-7","dir":"Reference","previous_headings":"","what":"Usage","title":"The core ","text":"","code":"RandomEffectsModel$set_variance_prior_scale(value)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"arguments-7","dir":"Reference","previous_headings":"","what":"Arguments","title":"The core ","text":"value Parameter input","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsModel.html","id":"returns-7","dir":"Reference","previous_headings":"","what":"Returns","title":"The core ","text":"None","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":null,"dir":"Reference","previous_headings":"","what":"Class that defines a ","title":"Class that defines a ","text":"Stores mapping every observation group index, mapping group indices training sample observations available group, predictions observation.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"public-fields","dir":"Reference","previous_headings":"","what":"Public fields","title":"Class that defines a ","text":"rfx_tracker_ptr External pointer C++ StochTree::RandomEffectsTracker class","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Class that defines a ","text":"RandomEffectsTracker$new()","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"method-new-","dir":"Reference","previous_headings":"","what":"Method new()","title":"Class that defines a ","text":"Create new RandomEffectsTracker object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Class that defines a ","text":"","code":"RandomEffectsTracker$new(rfx_group_indices)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Class that defines a ","text":"rfx_group_indices Integer indices indicating groups used define random effects","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/RandomEffectsTracker.html","id":"returns","dir":"Reference","previous_headings":"","what":"Returns","title":"Class that defines a ","text":"new RandomEffectsTracker object.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":null,"dir":"Reference","previous_headings":"","what":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"Run Bayesian Causal Forest (BCF) algorithm regularized causal effect estimation.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"","code":"bcf( X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, Z_test = NULL, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, feature_types = rep(0, ncol(X_train)), cutpoint_grid_size = 100, sigma_leaf_mu = NULL, sigma_leaf_tau = NULL, alpha_mu = 0.95, alpha_tau = 0.25, beta_mu = 2, beta_tau = 3, min_samples_leaf_mu = 5, min_samples_leaf_tau = 5, nu = 3, lambda = NULL, a_leaf_mu = 3, a_leaf_tau = 3, b_leaf_mu = NULL, b_leaf_tau = NULL, q = 0.9, sigma2 = NULL, num_trees_mu = 250, num_trees_tau = 50, num_gfr = 5, num_burnin = 0, num_mcmc = 100, sample_sigma_global = T, sample_sigma_leaf_mu = T, sample_sigma_leaf_tau = T, propensity_covariate = \"mu\", adaptive_coding = T, b_0 = -0.5, b_1 = 0.5, random_seed = -1 )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"X_train Covariates used split trees ensemble. Z_train Vector (continuous binary) treatment assignments. y_train Outcome modeled ensemble. pi_train (Optional) Vector propensity scores. provided, estimated data. group_ids_train (Optional) Group labels used additive random effects model. rfx_basis_train (Optional) Basis \"random-slope\" regression additive random effects model. group_ids_train provided regression basis, intercept-random effects model estimated. X_test (Optional) Test set covariates used define \"sample\" evaluation data. Z_test (Optional) Test set (continuous binary) treatment assignments. pi_test (Optional) Vector propensity scores. provided, estimated data. group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model. feature_types Vector length ncol(X_train) indicating \"type\" covariates (0 = numeric, 1 = ordered categorical, 2 = unordered categorical). Default: rep(0,ncol(X_train)). cutpoint_grid_size Maximum size \"grid\" potential cutpoints consider. Default: 100. sigma_leaf_mu Starting value leaf node scale parameter prognostic forest. Calibrated internally 2/num_trees_mu set . sigma_leaf_tau Starting value leaf node scale parameter treatment effect forest. Calibrated internally 1/num_trees_tau set . alpha_mu Prior probability splitting tree depth 0 prognostic forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 0.95. alpha_tau Prior probability splitting tree depth 0 treatment effect forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 0.25. beta_mu Exponent decreases split probabilities nodes depth > 0 prognostic forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 2.0. beta_tau Exponent decreases split probabilities nodes depth > 0 treatment effect forest. Tree split prior combines alpha beta via alpha*(1+node_depth)^-beta. Default: 3.0. min_samples_leaf_mu Minimum allowable size leaf, terms training samples, prognostic forest. Default: 5. min_samples_leaf_tau Minimum allowable size leaf, terms training samples, treatment effect forest. Default: 5. nu Shape parameter IG(nu, nu*lambda) global error variance model. Default: 3. lambda Component scale parameter IG(nu, nu*lambda) global error variance prior. specified, calibrated Sparapani et al (2021). a_leaf_mu Shape parameter IG(a_leaf, b_leaf) leaf node parameter variance model prognostic forest. Default: 3. a_leaf_tau Shape parameter IG(a_leaf, b_leaf) leaf node parameter variance model treatment effect forest. Default: 3. b_leaf_mu Scale parameter IG(a_leaf, b_leaf) leaf node parameter variance model prognostic forest. Calibrated internally 0.5/num_trees set . b_leaf_tau Scale parameter IG(a_leaf, b_leaf) leaf node parameter variance model treatment effect forest. Calibrated internally 0.5/num_trees set . q Quantile used calibrated lambda Sparapani et al (2021). Default: 0.9. sigma2 Starting value global variance parameter. Calibrated internally Sparapani et al (2021) set . num_trees_mu Number trees prognostic forest. Default: 200. num_trees_tau Number trees treatment effect forest. Default: 50. num_gfr Number \"warm-start\" iterations run using grow--root algorithm (Hahn, 2021). Default: 5. num_burnin Number \"burn-\" iterations MCMC sampler. Default: 0. num_mcmc Number \"retained\" iterations MCMC sampler. Default: 100. sample_sigma_global Whether update sigma^2 global error variance parameter based IG(nu, nu*lambda). Default: T. sample_sigma_leaf_mu Whether update sigma_leaf_mu leaf scale variance parameter prognostic forest based IG(a_leaf_mu, b_leaf_mu). Default: T. sample_sigma_leaf_tau Whether update sigma_leaf_tau leaf scale variance parameter treatment effect forest based IG(a_leaf_tau, b_leaf_tau). Default: T. propensity_covariate Whether include propensity score covariate either forests. Enter \"none\" neither, \"mu\" prognostic forest, \"tau\" treatment forest, \"\" forests. \"none\" propensity score provided, estimated (X_train, Z_train) using xgboost. Default: \"mu\". adaptive_coding Whether use \"adaptive coding\" scheme binary treatment variable coded manually (0,1) (-1,1) learned via parameters b_0 b_1 attach outcome model [b_0 (1-Z) + b_1 Z] tau(X). ignored Z binary. Default: T. b_0 Initial value \"control\" group coding parameter. ignored Z binary. Default: -0.5. b_1 Initial value \"treatment\" group coding parameter. ignored Z binary. Default: 0.5. random_seed Integer parameterizing C++ random number generator. specified, C++ random number generator seeded according std::random_device.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"List sampling outputs wrapper around sampled forests (can used -memory prediction new data, serialized JSON disk).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/bcf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Run the Bayesian Causal Forest (BCF) algorithm for regularized causal effect estimation. — bcf","text":"","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 4 y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test) # plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") # abline(0,1,col=\"red\",lty=3,lwd=3) # plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestKernels.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute a kernel from a tree ensemble, defined by the fraction of trees of an ensemble in which two observations fall into the same leaf. — computeForestKernels","title":"Compute a kernel from a tree ensemble, defined by the fraction of trees of an ensemble in which two observations fall into the same leaf. — computeForestKernels","text":"Compute kernel tree ensemble, defined fraction trees ensemble two observations fall leaf.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestKernels.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute a kernel from a tree ensemble, defined by the fraction of trees of an ensemble in which two observations fall into the same leaf. — computeForestKernels","text":"","code":"computeForestKernels(bart_model, X_train, X_test = NULL, forest_num = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestKernels.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute a kernel from a tree ensemble, defined by the fraction of trees of an ensemble in which two observations fall into the same leaf. — computeForestKernels","text":"bart_model Object type bartmodel corresponding BART model least one sample X_train Matrix \"training\" data. traditional Gaussian process kriging context, corresponds observations outcomes observed. X_test (Optional) Matrix \"test\" data. traditional Gaussian process kriging context, corresponds observations outcomes unobserved must estimated based kernels k(X_test,X_test), k(X_test,X_train), k(X_train,X_train). provided, function compute k(X_train, X_train). forest_num (Option) Index forest sample use kernel computation. provided, function use last forest.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestKernels.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute a kernel from a tree ensemble, defined by the fraction of trees of an ensemble in which two observations fall into the same leaf. — computeForestKernels","text":"List kernel matrices. X_test = NULL, list contains one n_train x n_train matrix, n_train = nrow(X_train). matrix kernel defined W_train %*% t(W_train) W_train matrix n_train rows many columns total leaves ensemble. X_test NULL, list contains two matrices defined W_test %*% t(W_train) W_test %*% t(W_test).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestLeafIndices.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute and return a vector representation of a forest's leaf predictions for every observation in a dataset. The vector has a ","title":"Compute and return a vector representation of a forest's leaf predictions for every observation in a dataset. The vector has a ","text":"Compute return vector representation forest's leaf predictions every observation dataset. vector \"column-major\" format can easily re-represented CSC sparse matrix: elements organized first n elements correspond leaf predictions n observations dataset first tree ensemble, next n elements correspond predictions second tree . \"data\" element corresponds uniquely mapped column index corresponds single leaf single tree (.e. tree 1 3 leaves, column indices range 0 2, tree 2's leaf indices begin 3, etc...). Users may pass single dataset (refer \"training set\") two datasets (refer \"training test sets\"). verbiage hints one potential use-case matrix leaf indices define ensemble-based kernel kriging.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestLeafIndices.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute and return a vector representation of a forest's leaf predictions for every observation in a dataset. The vector has a ","text":"","code":"computeForestLeafIndices(bart_model, X_train, X_test = NULL, forest_num = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestLeafIndices.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute and return a vector representation of a forest's leaf predictions for every observation in a dataset. The vector has a ","text":"bart_model Object type bartmodel corresponding BART model least one sample X_train Matrix \"training\" data. traditional Gaussian process kriging context, corresponds observations outcomes observed. X_test (Optional) Matrix \"test\" data. traditional Gaussian process kriging context, corresponds observations outcomes unobserved must estimated based kernels k(X_test,X_test), k(X_test,X_train), k(X_train,X_train). provided, function compute k(X_train, X_train). forest_num (Option) Index forest sample use kernel computation. provided, function use last forest.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/computeForestLeafIndices.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compute and return a vector representation of a forest's leaf predictions for every observation in a dataset. The vector has a ","text":"List vectors. X_test = NULL, list contains one vector length n_train * num_trees, n_train = nrow(X_train) num_trees number trees bart_model. X_test NULL, list contains another vector length n_test * num_trees.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a container of forest samples — createForestContainer","title":"Create a container of forest samples — createForestContainer","text":"Create container forest samples","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a container of forest samples — createForestContainer","text":"","code":"createForestContainer(num_trees, output_dimension = 1, is_leaf_constant = F)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a container of forest samples — createForestContainer","text":"num_trees Number trees output_dimension Dimensionality outcome model is_leaf_constant Whether leaf constant","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a container of forest samples — createForestContainer","text":"ForestSamples object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a forest dataset object — createForestDataset","title":"Create a forest dataset object — createForestDataset","text":"Create forest dataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a forest dataset object — createForestDataset","text":"","code":"createForestDataset(covariates, basis = NULL, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a forest dataset object — createForestDataset","text":"covariates Matrix covariates basis (Optional) Matrix bases used define leaf regression variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a forest dataset object — createForestDataset","text":"ForestDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestKernel.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a ForestKernel object — createForestKernel","title":"Create a ForestKernel object — createForestKernel","text":"Create ForestKernel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestKernel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a ForestKernel object — createForestKernel","text":"","code":"createForestKernel()"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestKernel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a ForestKernel object — createForestKernel","text":"ForestKernel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a forest model object — createForestModel","title":"Create a forest model object — createForestModel","text":"Create forest model object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a forest model object — createForestModel","text":"","code":"createForestModel( forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a forest model object — createForestModel","text":"forest_dataset ForestDataset object, used initialize forest sampling data structures feature_types Feature types (integers 0 = numeric, 1 = ordered categorical, 2 = unordered categorical) num_trees Number trees forest sampled n Number observations forest_dataset alpha Root node split probability tree prior beta Depth prior penalty tree prior min_samples_leaf Minimum number samples tree leaf","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createForestModel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a forest model object — createForestModel","text":"ForestModel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":null,"dir":"Reference","previous_headings":"","what":"Create an outcome object — createOutcome","title":"Create an outcome object — createOutcome","text":"Create outcome object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create an outcome object — createOutcome","text":"","code":"createOutcome(outcome)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create an outcome object — createOutcome","text":"outcome Vector outcome values","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createOutcome.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create an outcome object — createOutcome","text":"Outcome object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":null,"dir":"Reference","previous_headings":"","what":"Create an R class that wraps a C++ random number generator — createRNG","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"Create R class wraps C++ random number generator","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"","code":"createRNG(random_seed = -1)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"random_seed (Optional) random seed sampling","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRNG.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create an R class that wraps a C++ random number generator — createRNG","text":"CppRng object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a RandomEffectSamples object — createRandomEffectSamples","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"Create RandomEffectSamples object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"","code":"createRandomEffectSamples(num_components, num_groups, random_effects_tracker)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups random_effects_tracker Object type RandomEffectsTracker","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectSamples.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a RandomEffectSamples object — createRandomEffectSamples","text":"RandomEffectSamples object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a random effects dataset object — createRandomEffectsDataset","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"Create random effects dataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"","code":"createRandomEffectsDataset(group_labels, basis, variance_weights = NULL)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"group_labels Vector group labels basis Matrix bases used define random effects regression (intercept-model, pass array ones) variance_weights (Optional) Vector observation-specific variance weights","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsDataset.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a random effects dataset object — createRandomEffectsDataset","text":"RandomEffectsDataset object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a RandomEffectsModel object — createRandomEffectsModel","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"Create RandomEffectsModel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"","code":"createRandomEffectsModel(num_components, num_groups)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"num_components Number \"components\" bases defining random effects regression num_groups Number random effects groups","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsModel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a RandomEffectsModel object — createRandomEffectsModel","text":"RandomEffectsModel object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":null,"dir":"Reference","previous_headings":"","what":"Create a RandomEffectsTracker object — createRandomEffectsTracker","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"Create RandomEffectsTracker object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"","code":"createRandomEffectsTracker(rfx_group_indices)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"rfx_group_indices Integer indices indicating groups used define random effects","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/createRandomEffectsTracker.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create a RandomEffectsTracker object — createRandomEffectsTracker","text":"RandomEffectsTracker object","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"Extract raw sample values random effect parameter terms.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"","code":"# S3 method for bartmodel getRandomEffectSamples(object, ...)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"object Object type bcf containing draws Bayesian causal forest model associated sampling outputs.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"List arrays. alpha array dimension (num_components, num_samples) simply vector num_components = 1. xi beta arrays dimension (num_components, num_groups, num_samples) simply matrix num_components = 1. sigma array dimension (num_components, num_samples) simply vector num_components = 1.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bartmodel.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bartmodel","text":"","code":"n <- 100 p <- 5 X <- matrix(runif(n*p), ncol = p) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) snr <- 3 group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) E_y <- f_XW + rfx_term y <- E_y + rnorm(n, 0, 1)*(sd(E_y)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, sample_tau = T) rfx_samples <- getRandomEffectSamples(bart_model)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":null,"dir":"Reference","previous_headings":"","what":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"Extract raw sample values random effect parameter terms.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"","code":"# S3 method for bcf getRandomEffectSamples(object, ...)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"object Object type bcf containing draws Bayesian causal forest model associated sampling outputs.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"List arrays. alpha array dimension (num_components, num_samples) simply vector num_components = 1. xi beta arrays dimension (num_components, num_groups, num_samples) simply matrix num_components = 1. sigma array dimension (num_components, num_samples) simply vector num_components = 1.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.bcf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Extract raw sample values for each of the random effect parameter terms. — getRandomEffectSamples.bcf","text":"","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1),nrow=2,byrow=T) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] group_ids_test <- group_ids[test_inds] group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, rfx_basis_test = rfx_basis_test, feature_types = c(0,0,0,1,1), num_gfr = 100, num_burnin = 0, num_mcmc = 100, sample_sigma_leaf_mu = T, sample_sigma_leaf_tau = F) rfx_samples <- getRandomEffectSamples(bcf_model)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.html","id":null,"dir":"Reference","previous_headings":"","what":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","title":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","text":"Generic function extracting random effect samples model object (BCF, BART, etc...)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","text":"","code":"getRandomEffectSamples(object, ...)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/getRandomEffectSamples.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Generic function for extracting random effect samples from a model object (BCF, BART, etc...) — getRandomEffectSamples","text":"List random effect samples","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict from a sampled BART model on new data — predict.bartmodel","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"Predict sampled BART model new data","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"","code":"# S3 method for bartmodel predict( bart, X_test, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"bart Object type bart containing draws regression forest associated sampling outputs. X_test Covariates used determine tree leaf predictions observation. W_test (Optional) Bases used prediction (e.g. dot product leaf values). Default: NULL. group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"List prediction matrices. model random effects, list one element -- predictions forest. model random effects, list three elements -- forest predictions, random effects predictions, sum (y_hat).","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bartmodel.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Predict from a sampled BART model on new data — predict.bartmodel","text":"","code":"n <- 100 p <- 5 X <- matrix(runif(n*p), ncol = p) f_XW <- ( ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5) ) noise_sd <- 1 y <- f_XW + rnorm(n, 0, noise_sd) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, leaf_model = 0) yhat_test <- predict(bart_model, X_test) #> Error in (bart$model_params$has_rfx_basis) && (is.null(rfx_basis_test)): invalid 'x' type in 'x && y' # plot(rowMeans(yhat_test), y_test, xlab = \"predicted\", ylab = \"actual\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":null,"dir":"Reference","previous_headings":"","what":"Predict from a sampled BCF model on new data — predict.bcf","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"Predict sampled BCF model new data","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"","code":"# S3 method for bcf predict( bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL )"},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"bcf Object type bcf containing draws Bayesian causal forest model associated sampling outputs. X_test Covariates used determine tree leaf predictions observation. Z_test Treatments used prediction. pi_test (Optional) Propensities used prediction. group_ids_test (Optional) Test set group labels used additive random effects model. currently support (plan near future), test set evaluation group labels training set. rfx_basis_test (Optional) Test set basis \"random-slope\" regression additive random effects model.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"List three (four) nrow(X_test) bcf$num_samples matrices: prognostic function estimates, treatment effect estimates, (possibly) random effects predictions, outcome predictions.","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/predict.bcf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Predict from a sampled BCF model on new data — predict.bcf","text":"","code":"n <- 500 x1 <- rnorm(n) x2 <- rnorm(n) x3 <- rnorm(n) x4 <- as.numeric(rbinom(n,1,0.5)) x5 <- as.numeric(sample(1:3,n,replace=T)) X <- cbind(x1,x2,x3,x4,x5) p <- ncol(X) g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,4))} mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} tau1 <- function(x) {rep(3,nrow(x))} tau2 <- function(x) {1+2*x[,2]*x[,4]} mu_x <- mu1(X) tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 4 y <- E_XZ + rnorm(n, 0, 1)*(sd(E_XZ)/snr) test_set_pct <- 0.2 n_test <- round(test_set_pct*n) n_train <- n - n_test test_inds <- sort(sample(1:n, n_test, replace = F)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- X[test_inds,] X_train <- X[train_inds,] pi_test <- pi_x[test_inds] pi_train <- pi_x[train_inds] Z_test <- Z[test_inds] Z_train <- Z[train_inds] y_test <- y[test_inds] y_train <- y[train_inds] mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train) preds <- predict(bcf_model, X_test, Z_test, pi_test) #> Error in (bcf$model_params$has_rfx_basis) && (is.null(rfx_basis_test)): invalid 'x' type in 'x && y' # plot(rowMeans(preds$mu_hat), mu_test, xlab = \"predicted\", ylab = \"actual\", main = \"Prognostic function\") # abline(0,1,col=\"red\",lty=3,lwd=3) # plot(rowMeans(preds$tau_hat), tau_test, xlab = \"predicted\", ylab = \"actual\", main = \"Treatment effect\") # abline(0,1,col=\"red\",lty=3,lwd=3)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_sigma2_one_iteration.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","title":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","text":"Sample one iteration global variance model","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_sigma2_one_iteration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","text":"","code":"sample_sigma2_one_iteration(residual, rng, nu, lambda)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_sigma2_one_iteration.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample one iteration of the global variance model — sample_sigma2_one_iteration","text":"residual Outcome class rng C++ random number generator nu Global variance shape parameter lambda Constitutes scale parameter global variance along nu (.e. scale nu*lambda)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_tau_one_iteration.html","id":null,"dir":"Reference","previous_headings":"","what":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","title":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","text":"Sample one iteration leaf parameter variance model (univariate basis constant leaf!)","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_tau_one_iteration.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","text":"","code":"sample_tau_one_iteration(forest_samples, rng, a, b, sample_num)"},{"path":"https://stochastictree.github.io/stochtree-r/reference/sample_tau_one_iteration.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) — sample_tau_one_iteration","text":"forest_samples Container forest samples rng C++ random number generator Leaf variance shape parameter b Leaf variance scale parameter sample_num Sample index","code":""},{"path":"https://stochastictree.github.io/stochtree-r/reference/stochtree-package.html","id":null,"dir":"Reference","previous_headings":"","what":"stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference — stochtree-package","title":"stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference — stochtree-package","text":"Stochastic tree ensembles (XBART BART) supervised learning causal inference","code":""},{"path":[]},{"path":"https://stochastictree.github.io/stochtree-r/reference/stochtree-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"stochtree: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference — stochtree-package","text":"Maintainer: Drew Herren drewherrenopensource@gmail.com (ORCID) Authors: Richard Hahn Jared Murray Carlos Carvalho Jingyu ","code":""}] diff --git a/sitemap.xml b/sitemap.xml index e14f7a3..c5c51c2 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -15,6 +15,9 @@ https://stochastictree.github.io/stochtree-r/articles/Causal-Inference.html + + https://stochastictree.github.io/stochtree-r/articles/Ensemble-Kernel.html + https://stochastictree.github.io/stochtree-r/articles/Prototype-Interface.html @@ -36,6 +39,9 @@ https://stochastictree.github.io/stochtree-r/reference/ForestDataset.html + + https://stochastictree.github.io/stochtree-r/reference/ForestKernel.html + https://stochastictree.github.io/stochtree-r/reference/ForestModel.html @@ -60,12 +66,21 @@ https://stochastictree.github.io/stochtree-r/reference/bcf.html + + https://stochastictree.github.io/stochtree-r/reference/computeForestKernels.html + + + https://stochastictree.github.io/stochtree-r/reference/computeForestLeafIndices.html + https://stochastictree.github.io/stochtree-r/reference/createForestContainer.html https://stochastictree.github.io/stochtree-r/reference/createForestDataset.html + + https://stochastictree.github.io/stochtree-r/reference/createForestKernel.html + https://stochastictree.github.io/stochtree-r/reference/createForestModel.html